blob: 4edc86cceda6c8d5a9c9c03a2acbd260a68d956d [file] [log] [blame]
Jingyue Wud7966ff2015-02-03 19:37:06 +00001//===-- StraightLineStrengthReduce.cpp - ------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements straight-line strength reduction (SLSR). Unlike loop
11// strength reduction, this algorithm is designed to reduce arithmetic
12// redundancy in straight-line code instead of loops. It has proven to be
13// effective in simplifying arithmetic statements derived from an unrolled loop.
14// It can also simplify the logic of SeparateConstOffsetFromGEP.
15//
16// There are many optimizations we can perform in the domain of SLSR. This file
17// for now contains only an initial step. Specifically, we look for strength
18// reduction candidate in the form of
19//
20// (B + i) * S
21//
22// where B and S are integer constants or variables, and i is a constant
23// integer. If we found two such candidates
24//
25// S1: X = (B + i) * S S2: Y = (B + i') * S
26//
27// and S1 dominates S2, we call S1 a basis of S2, and can replace S2 with
28//
29// Y = X + (i' - i) * S
30//
31// where (i' - i) * S is folded to the extent possible. When S2 has multiple
32// bases, we pick the one that is closest to S2, or S2's "immediate" basis.
33//
34// TODO:
35//
36// - Handle candidates in the form of B + i * S
37//
38// - Handle candidates in the form of pointer arithmetics. e.g., B[i * S]
39//
40// - Floating point arithmetics when fast math is enabled.
41//
42// - SLSR may decrease ILP at the architecture level. Targets that are very
43// sensitive to ILP may want to disable it. Having SLSR to consider ILP is
44// left as future work.
45#include <vector>
46
47#include "llvm/ADT/DenseSet.h"
48#include "llvm/IR/Dominators.h"
49#include "llvm/IR/IRBuilder.h"
50#include "llvm/IR/Module.h"
51#include "llvm/IR/PatternMatch.h"
52#include "llvm/Support/raw_ostream.h"
53#include "llvm/Transforms/Scalar.h"
54
55using namespace llvm;
56using namespace PatternMatch;
57
58namespace {
59
60class StraightLineStrengthReduce : public FunctionPass {
61 public:
62 // SLSR candidate. Such a candidate must be in the form of
63 // (Base + Index) * Stride
64 struct Candidate : public ilist_node<Candidate> {
65 Candidate(Value *B = nullptr, ConstantInt *Idx = nullptr,
66 Value *S = nullptr, Instruction *I = nullptr)
67 : Base(B), Index(Idx), Stride(S), Ins(I), Basis(nullptr) {}
68 Value *Base;
69 ConstantInt *Index;
70 Value *Stride;
71 // The instruction this candidate corresponds to. It helps us to rewrite a
72 // candidate with respect to its immediate basis. Note that one instruction
73 // can corresponds to multiple candidates depending on how you associate the
74 // expression. For instance,
75 //
76 // (a + 1) * (b + 2)
77 //
78 // can be treated as
79 //
80 // <Base: a, Index: 1, Stride: b + 2>
81 //
82 // or
83 //
84 // <Base: b, Index: 2, Stride: a + 1>
85 Instruction *Ins;
86 // Points to the immediate basis of this candidate, or nullptr if we cannot
87 // find any basis for this candidate.
88 Candidate *Basis;
89 };
90
91 static char ID;
92
93 StraightLineStrengthReduce() : FunctionPass(ID), DT(nullptr) {
94 initializeStraightLineStrengthReducePass(*PassRegistry::getPassRegistry());
95 }
96
97 void getAnalysisUsage(AnalysisUsage &AU) const override {
98 AU.addRequired<DominatorTreeWrapperPass>();
99 // We do not modify the shape of the CFG.
100 AU.setPreservesCFG();
101 }
102
103 bool runOnFunction(Function &F) override;
104
105 private:
106 // Returns true if Basis is a basis for C, i.e., Basis dominates C and they
107 // share the same base and stride.
108 bool isBasisFor(const Candidate &Basis, const Candidate &C);
109 // Checks whether I is in a candidate form. If so, adds all the matching forms
110 // to Candidates, and tries to find the immediate basis for each of them.
111 void allocateCandidateAndFindBasis(Instruction *I);
112 // Given that I is in the form of "(B + Idx) * S", adds this form to
113 // Candidates, and finds its immediate basis.
114 void allocateCandidateAndFindBasis(Value *B, ConstantInt *Idx, Value *S,
115 Instruction *I);
116 // Rewrites candidate C with respect to Basis.
117 void rewriteCandidateWithBasis(const Candidate &C, const Candidate &Basis);
118
119 DominatorTree *DT;
120 ilist<Candidate> Candidates;
121 // Temporarily holds all instructions that are unlinked (but not deleted) by
122 // rewriteCandidateWithBasis. These instructions will be actually removed
123 // after all rewriting finishes.
124 DenseSet<Instruction *> UnlinkedInstructions;
125};
126} // anonymous namespace
127
128char StraightLineStrengthReduce::ID = 0;
129INITIALIZE_PASS_BEGIN(StraightLineStrengthReduce, "slsr",
130 "Straight line strength reduction", false, false)
131INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
132INITIALIZE_PASS_END(StraightLineStrengthReduce, "slsr",
133 "Straight line strength reduction", false, false)
134
135FunctionPass *llvm::createStraightLineStrengthReducePass() {
136 return new StraightLineStrengthReduce();
137}
138
139bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis,
140 const Candidate &C) {
141 return (Basis.Ins != C.Ins && // skip the same instruction
142 // Basis must dominate C in order to rewrite C with respect to Basis.
143 DT->dominates(Basis.Ins->getParent(), C.Ins->getParent()) &&
144 // They share the same base and stride.
145 Basis.Base == C.Base &&
146 Basis.Stride == C.Stride);
147}
148
149// TODO: We currently implement an algorithm whose time complexity is linear to
150// the number of existing candidates. However, a better algorithm exists. We
151// could depth-first search the dominator tree, and maintain a hash table that
152// contains all candidates that dominate the node being traversed. This hash
153// table is indexed by the base and the stride of a candidate. Therefore,
154// finding the immediate basis of a candidate boils down to one hash-table look
155// up.
156void StraightLineStrengthReduce::allocateCandidateAndFindBasis(Value *B,
157 ConstantInt *Idx,
158 Value *S,
159 Instruction *I) {
160 Candidate C(B, Idx, S, I);
161 // Try to compute the immediate basis of C.
162 unsigned NumIterations = 0;
163 // Limit the scan radius to avoid running forever.
Aaron Ballman34c325e2015-02-04 14:01:08 +0000164 static const unsigned MaxNumIterations = 50;
Jingyue Wud7966ff2015-02-03 19:37:06 +0000165 for (auto Basis = Candidates.rbegin();
166 Basis != Candidates.rend() && NumIterations < MaxNumIterations;
167 ++Basis, ++NumIterations) {
168 if (isBasisFor(*Basis, C)) {
169 C.Basis = &(*Basis);
170 break;
171 }
172 }
173 // Regardless of whether we find a basis for C, we need to push C to the
174 // candidate list.
175 Candidates.push_back(C);
176}
177
178void StraightLineStrengthReduce::allocateCandidateAndFindBasis(Instruction *I) {
179 Value *B = nullptr;
180 ConstantInt *Idx = nullptr;
181 // "(Base + Index) * Stride" must be a Mul instruction at the first hand.
182 if (I->getOpcode() == Instruction::Mul) {
183 if (IntegerType *ITy = dyn_cast<IntegerType>(I->getType())) {
184 Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
185 for (unsigned Swapped = 0; Swapped < 2; ++Swapped) {
186 // Only handle the canonical operand ordering.
187 if (match(LHS, m_Add(m_Value(B), m_ConstantInt(Idx)))) {
188 // If LHS is in the form of "Base + Index", then I is in the form of
189 // "(Base + Index) * RHS".
190 allocateCandidateAndFindBasis(B, Idx, RHS, I);
191 } else {
192 // Otherwise, at least try the form (LHS + 0) * RHS.
193 allocateCandidateAndFindBasis(LHS, ConstantInt::get(ITy, 0), RHS, I);
194 }
195 // Swap LHS and RHS so that we also cover the cases where LHS is the
196 // stride.
197 if (LHS == RHS)
198 break;
199 std::swap(LHS, RHS);
200 }
201 }
202 }
203}
204
205void StraightLineStrengthReduce::rewriteCandidateWithBasis(
206 const Candidate &C, const Candidate &Basis) {
207 // An instruction can correspond to multiple candidates. Therefore, instead of
208 // simply deleting an instruction when we rewrite it, we mark its parent as
209 // nullptr (i.e. unlink it) so that we can skip the candidates whose
210 // instruction is already rewritten.
211 if (!C.Ins->getParent())
212 return;
213 assert(C.Base == Basis.Base && C.Stride == Basis.Stride);
214 // Basis = (B + i) * S
215 // C = (B + i') * S
216 // ==>
217 // C = Basis + (i' - i) * S
218 IRBuilder<> Builder(C.Ins);
219 ConstantInt *IndexOffset = ConstantInt::get(
220 C.Ins->getContext(), C.Index->getValue() - Basis.Index->getValue());
221 Value *Reduced;
222 // TODO: preserve nsw/nuw in some cases.
223 if (IndexOffset->isOne()) {
224 // If (i' - i) is 1, fold C into Basis + S.
225 Reduced = Builder.CreateAdd(Basis.Ins, C.Stride);
226 } else if (IndexOffset->isMinusOne()) {
227 // If (i' - i) is -1, fold C into Basis - S.
228 Reduced = Builder.CreateSub(Basis.Ins, C.Stride);
229 } else {
230 Value *Bump = Builder.CreateMul(C.Stride, IndexOffset);
231 Reduced = Builder.CreateAdd(Basis.Ins, Bump);
232 }
233 Reduced->takeName(C.Ins);
234 C.Ins->replaceAllUsesWith(Reduced);
235 C.Ins->dropAllReferences();
236 // Unlink C.Ins so that we can skip other candidates also corresponding to
237 // C.Ins. The actual deletion is postponed to the end of runOnFunction.
238 C.Ins->removeFromParent();
239 UnlinkedInstructions.insert(C.Ins);
240}
241
242bool StraightLineStrengthReduce::runOnFunction(Function &F) {
243 if (skipOptnoneFunction(F))
244 return false;
245
246 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
247 // Traverse the dominator tree in the depth-first order. This order makes sure
248 // all bases of a candidate are in Candidates when we process it.
249 for (auto node = GraphTraits<DominatorTree *>::nodes_begin(DT);
250 node != GraphTraits<DominatorTree *>::nodes_end(DT); ++node) {
251 BasicBlock *B = node->getBlock();
252 for (auto I = B->begin(); I != B->end(); ++I) {
253 allocateCandidateAndFindBasis(I);
254 }
255 }
256
257 // Rewrite candidates in the reverse depth-first order. This order makes sure
258 // a candidate being rewritten is not a basis for any other candidate.
259 while (!Candidates.empty()) {
260 const Candidate &C = Candidates.back();
261 if (C.Basis != nullptr) {
262 rewriteCandidateWithBasis(C, *C.Basis);
263 }
264 Candidates.pop_back();
265 }
266
267 // Delete all unlink instructions.
268 for (auto I : UnlinkedInstructions) {
269 delete I;
270 }
271 bool Ret = !UnlinkedInstructions.empty();
272 UnlinkedInstructions.clear();
273 return Ret;
274}