blob: 15a4e8e3c64a9f92f6190aec8a8e4b88b6eed330 [file] [log] [blame]
Dan Gohman0a40ad92009-04-16 03:18:22 +00001//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
Misha Brukmanb1c93172005-04-21 23:48:37 +00002//
Nate Begemanb18121e2004-10-18 21:08:22 +00003// The LLVM Compiler Infrastructure
4//
Chris Lattnerf3ebc3f2007-12-29 20:36:04 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Misha Brukmanb1c93172005-04-21 23:48:37 +00007//
Nate Begemanb18121e2004-10-18 21:08:22 +00008//===----------------------------------------------------------------------===//
9//
Dan Gohman97f70ad2009-05-19 20:37:36 +000010// This transformation analyzes and transforms the induction variables (and
11// computations derived from them) into forms suitable for efficient execution
12// on the target.
13//
Nate Begemanb18121e2004-10-18 21:08:22 +000014// This pass performs a strength reduction on array references inside loops that
Dan Gohman97f70ad2009-05-19 20:37:36 +000015// have as one or more of their components the loop induction variable, it
16// rewrites expressions to take advantage of scaled-index addressing modes
17// available on the target, and it performs a variety of other optimizations
18// related to loop induction variables.
Nate Begemanb18121e2004-10-18 21:08:22 +000019//
Dan Gohman45774ce2010-02-12 10:34:29 +000020// Terminology note: this code has a lot of handling for "post-increment" or
21// "post-inc" users. This is not talking about post-increment addressing modes;
22// it is instead talking about code like this:
23//
24// %i = phi [ 0, %entry ], [ %i.next, %latch ]
25// ...
26// %i.next = add %i, 1
27// %c = icmp eq %i.next, %n
28//
29// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
30// it's useful to think about these as the same register, with some uses using
31// the value of the register before the add and some using // it after. In this
32// example, the icmp is a post-increment user, since it uses %i.next, which is
33// the value of the induction variable after the increment. The other common
34// case of post-increment users is users outside the loop.
35//
36// TODO: More sophistication in the way Formulae are generated and filtered.
37//
38// TODO: Handle multiple loops at a time.
39//
Chandler Carruth26c59fa2013-01-07 14:41:08 +000040// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead
41// of a GlobalValue?
Dan Gohman45774ce2010-02-12 10:34:29 +000042//
43// TODO: When truncation is free, truncate ICmp users' operands to make it a
44// smaller encoding (on x86 at least).
45//
46// TODO: When a negated register is used by an add (such as in a list of
47// multiple base registers, or as the increment expression in an addrec),
48// we may not actually need both reg and (-1 * reg) in registers; the
49// negation can be implemented by using a sub instead of an add. The
50// lack of support for taking this into consideration when making
51// register pressure decisions is partly worked around by the "Special"
52// use kind.
53//
Nate Begemanb18121e2004-10-18 21:08:22 +000054//===----------------------------------------------------------------------===//
55
Chandler Carruthed0881b2012-12-03 16:50:05 +000056#include "llvm/Transforms/Scalar.h"
57#include "llvm/ADT/DenseSet.h"
Benjamin Kramer62fb0cf2014-03-15 17:17:48 +000058#include "llvm/ADT/Hashing.h"
Chandler Carruth8a8cd2b2014-01-07 11:48:04 +000059#include "llvm/ADT/STLExtras.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000060#include "llvm/ADT/SetVector.h"
61#include "llvm/ADT/SmallBitVector.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000062#include "llvm/Analysis/IVUsers.h"
Devang Patelb0743b52007-03-06 21:14:09 +000063#include "llvm/Analysis/LoopPass.h"
Nate Begemane68bcd12005-07-30 00:15:07 +000064#include "llvm/Analysis/ScalarEvolutionExpander.h"
Chandler Carruth26c59fa2013-01-07 14:41:08 +000065#include "llvm/Analysis/TargetTransformInfo.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000066#include "llvm/IR/Constants.h"
67#include "llvm/IR/DerivedTypes.h"
Chandler Carruth5ad5f152014-01-13 09:26:24 +000068#include "llvm/IR/Dominators.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000069#include "llvm/IR/Instructions.h"
70#include "llvm/IR/IntrinsicInst.h"
Chandler Carruth4220e9c2014-03-04 11:17:44 +000071#include "llvm/IR/ValueHandle.h"
Andrew Trick58124392011-09-27 00:44:14 +000072#include "llvm/Support/CommandLine.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000073#include "llvm/Support/Debug.h"
Daniel Dunbar6115b392009-07-26 09:48:23 +000074#include "llvm/Support/raw_ostream.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000075#include "llvm/Transforms/Utils/BasicBlockUtils.h"
76#include "llvm/Transforms/Utils/Local.h"
Jeff Cohenc5009912005-07-30 18:22:27 +000077#include <algorithm>
Nate Begemanb18121e2004-10-18 21:08:22 +000078using namespace llvm;
79
Chandler Carruth964daaa2014-04-22 02:55:47 +000080#define DEBUG_TYPE "loop-reduce"
81
Andrew Trick19f80c12012-04-18 04:00:10 +000082/// MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for
83/// bail out. This threshold is far beyond the number of users that LSR can
84/// conceivably solve, so it should not affect generated code, but catches the
85/// worst cases before LSR burns too much compile time and stack space.
86static const unsigned MaxIVUsers = 200;
87
Andrew Trickecbe22b2011-10-11 02:30:45 +000088// Temporary flag to cleanup congruent phis after LSR phi expansion.
89// It's currently disabled until we can determine whether it's truly useful or
90// not. The flag should be removed after the v3.0 release.
Andrew Trick06f6c052012-01-07 07:08:17 +000091// This is now needed for ivchains.
Benjamin Kramer7ba71be2011-11-26 23:01:57 +000092static cl::opt<bool> EnablePhiElim(
Andrew Trick06f6c052012-01-07 07:08:17 +000093 "enable-lsr-phielim", cl::Hidden, cl::init(true),
94 cl::desc("Enable LSR phi elimination"));
Andrew Trick58124392011-09-27 00:44:14 +000095
Andrew Trick248d4102012-01-09 21:18:52 +000096#ifndef NDEBUG
97// Stress test IV chain generation.
98static cl::opt<bool> StressIVChain(
99 "stress-ivchain", cl::Hidden, cl::init(false),
100 cl::desc("Stress test LSR IV chains"));
101#else
102static bool StressIVChain = false;
103#endif
104
Dan Gohman45774ce2010-02-12 10:34:29 +0000105namespace {
Nate Begemanb18121e2004-10-18 21:08:22 +0000106
Dan Gohman45774ce2010-02-12 10:34:29 +0000107/// RegSortData - This class holds data which is used to order reuse candidates.
108class RegSortData {
109public:
110 /// UsedByIndices - This represents the set of LSRUse indices which reference
111 /// a particular register.
112 SmallBitVector UsedByIndices;
113
114 RegSortData() {}
115
116 void print(raw_ostream &OS) const;
117 void dump() const;
118};
119
120}
121
122void RegSortData::print(raw_ostream &OS) const {
123 OS << "[NumUses=" << UsedByIndices.count() << ']';
124}
125
Manman Ren49d684e2012-09-12 05:06:18 +0000126#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Dan Gohman45774ce2010-02-12 10:34:29 +0000127void RegSortData::dump() const {
128 print(errs()); errs() << '\n';
129}
Manman Renc3366cc2012-09-06 19:55:56 +0000130#endif
Dan Gohman2a12ae72009-02-20 04:17:46 +0000131
Chris Lattner79a42ac2006-12-19 21:40:18 +0000132namespace {
Dale Johannesene3a02be2007-03-20 00:47:50 +0000133
Dan Gohman45774ce2010-02-12 10:34:29 +0000134/// RegUseTracker - Map register candidates to information about how they are
135/// used.
136class RegUseTracker {
137 typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
Dale Johannesene3a02be2007-03-20 00:47:50 +0000138
Dan Gohman248c41d2010-05-18 22:33:00 +0000139 RegUsesTy RegUsesMap;
Dan Gohman45774ce2010-02-12 10:34:29 +0000140 SmallVector<const SCEV *, 16> RegSequence;
Evan Cheng3df447d2006-03-16 21:53:05 +0000141
Dan Gohman45774ce2010-02-12 10:34:29 +0000142public:
143 void CountRegister(const SCEV *Reg, size_t LUIdx);
Dan Gohman4cf99b52010-05-18 23:42:37 +0000144 void DropRegister(const SCEV *Reg, size_t LUIdx);
Dan Gohmana7b68d62010-10-07 23:33:43 +0000145 void SwapAndDropUse(size_t LUIdx, size_t LastLUIdx);
Dan Gohman51ad99d2010-01-21 02:09:26 +0000146
Dan Gohman45774ce2010-02-12 10:34:29 +0000147 bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
Dan Gohman51ad99d2010-01-21 02:09:26 +0000148
Dan Gohman45774ce2010-02-12 10:34:29 +0000149 const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;
Dan Gohman51ad99d2010-01-21 02:09:26 +0000150
Dan Gohman45774ce2010-02-12 10:34:29 +0000151 void clear();
Dan Gohman51ad99d2010-01-21 02:09:26 +0000152
Dan Gohman45774ce2010-02-12 10:34:29 +0000153 typedef SmallVectorImpl<const SCEV *>::iterator iterator;
154 typedef SmallVectorImpl<const SCEV *>::const_iterator const_iterator;
155 iterator begin() { return RegSequence.begin(); }
156 iterator end() { return RegSequence.end(); }
157 const_iterator begin() const { return RegSequence.begin(); }
158 const_iterator end() const { return RegSequence.end(); }
159};
Dan Gohman51ad99d2010-01-21 02:09:26 +0000160
Dan Gohman51ad99d2010-01-21 02:09:26 +0000161}
162
Dan Gohman45774ce2010-02-12 10:34:29 +0000163void
164RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
165 std::pair<RegUsesTy::iterator, bool> Pair =
Dan Gohman248c41d2010-05-18 22:33:00 +0000166 RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
Dan Gohman45774ce2010-02-12 10:34:29 +0000167 RegSortData &RSD = Pair.first->second;
168 if (Pair.second)
169 RegSequence.push_back(Reg);
170 RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
171 RSD.UsedByIndices.set(LUIdx);
Dan Gohman51ad99d2010-01-21 02:09:26 +0000172}
173
Dan Gohman4cf99b52010-05-18 23:42:37 +0000174void
175RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) {
176 RegUsesTy::iterator It = RegUsesMap.find(Reg);
177 assert(It != RegUsesMap.end());
178 RegSortData &RSD = It->second;
179 assert(RSD.UsedByIndices.size() > LUIdx);
180 RSD.UsedByIndices.reset(LUIdx);
181}
182
Dan Gohman20fab452010-05-19 23:43:12 +0000183void
Dan Gohmana7b68d62010-10-07 23:33:43 +0000184RegUseTracker::SwapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
185 assert(LUIdx <= LastLUIdx);
186
187 // Update RegUses. The data structure is not optimized for this purpose;
188 // we must iterate through it and update each of the bit vectors.
Dan Gohman20fab452010-05-19 23:43:12 +0000189 for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end();
Dan Gohmana7b68d62010-10-07 23:33:43 +0000190 I != E; ++I) {
191 SmallBitVector &UsedByIndices = I->second.UsedByIndices;
192 if (LUIdx < UsedByIndices.size())
193 UsedByIndices[LUIdx] =
194 LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : 0;
195 UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
196 }
Dan Gohman20fab452010-05-19 23:43:12 +0000197}
198
Dan Gohman45774ce2010-02-12 10:34:29 +0000199bool
200RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
Dan Gohman4f13bbf2010-08-29 15:18:49 +0000201 RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
202 if (I == RegUsesMap.end())
203 return false;
204 const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
Dan Gohman45774ce2010-02-12 10:34:29 +0000205 int i = UsedByIndices.find_first();
206 if (i == -1) return false;
207 if ((size_t)i != LUIdx) return true;
208 return UsedByIndices.find_next(i) != -1;
209}
Dan Gohman51ad99d2010-01-21 02:09:26 +0000210
Dan Gohman45774ce2010-02-12 10:34:29 +0000211const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
Dan Gohman248c41d2010-05-18 22:33:00 +0000212 RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
213 assert(I != RegUsesMap.end() && "Unknown register!");
Dan Gohman45774ce2010-02-12 10:34:29 +0000214 return I->second.UsedByIndices;
215}
Dan Gohman51ad99d2010-01-21 02:09:26 +0000216
Dan Gohman45774ce2010-02-12 10:34:29 +0000217void RegUseTracker::clear() {
Dan Gohman248c41d2010-05-18 22:33:00 +0000218 RegUsesMap.clear();
Dan Gohman45774ce2010-02-12 10:34:29 +0000219 RegSequence.clear();
220}
Dan Gohman51ad99d2010-01-21 02:09:26 +0000221
Dan Gohman45774ce2010-02-12 10:34:29 +0000222namespace {
223
224/// Formula - This class holds information that describes a formula for
225/// computing satisfying a use. It may include broken-out immediates and scaled
226/// registers.
227struct Formula {
Chandler Carruth6e479322013-01-07 15:04:40 +0000228 /// Global base address used for complex addressing.
229 GlobalValue *BaseGV;
230
231 /// Base offset for complex addressing.
232 int64_t BaseOffset;
233
234 /// Whether any complex addressing has a base register.
235 bool HasBaseReg;
236
237 /// The scale of any complex addressing.
238 int64_t Scale;
Dan Gohman45774ce2010-02-12 10:34:29 +0000239
240 /// BaseRegs - The list of "base" registers for this use. When this is
Chandler Carruth6e479322013-01-07 15:04:40 +0000241 /// non-empty,
Preston Gurd25c3b6a2013-02-01 20:41:27 +0000242 SmallVector<const SCEV *, 4> BaseRegs;
Dan Gohman45774ce2010-02-12 10:34:29 +0000243
244 /// ScaledReg - The 'scaled' register for this use. This should be non-null
Chandler Carruth6e479322013-01-07 15:04:40 +0000245 /// when Scale is not zero.
Dan Gohman45774ce2010-02-12 10:34:29 +0000246 const SCEV *ScaledReg;
247
Dan Gohman6136e942011-05-03 00:46:49 +0000248 /// UnfoldedOffset - An additional constant offset which added near the
249 /// use. This requires a temporary register, but the offset itself can
250 /// live in an add immediate field rather than a register.
251 int64_t UnfoldedOffset;
252
Chandler Carruth6e479322013-01-07 15:04:40 +0000253 Formula()
254 : BaseGV(0), BaseOffset(0), HasBaseReg(false), Scale(0), ScaledReg(0),
255 UnfoldedOffset(0) {}
Dan Gohman45774ce2010-02-12 10:34:29 +0000256
Dan Gohman20d9ce22010-11-17 21:41:58 +0000257 void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
Dan Gohman45774ce2010-02-12 10:34:29 +0000258
259 unsigned getNumRegs() const;
Chris Lattner229907c2011-07-18 04:54:35 +0000260 Type *getType() const;
Dan Gohman45774ce2010-02-12 10:34:29 +0000261
Dan Gohman80a96082010-05-20 15:17:54 +0000262 void DeleteBaseReg(const SCEV *&S);
263
Dan Gohman45774ce2010-02-12 10:34:29 +0000264 bool referencesReg(const SCEV *S) const;
265 bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
266 const RegUseTracker &RegUses) const;
267
268 void print(raw_ostream &OS) const;
269 void dump() const;
270};
271
272}
273
Dan Gohman8b0a4192010-03-01 17:49:51 +0000274/// DoInitialMatch - Recursion helper for InitialMatch.
Dan Gohman45774ce2010-02-12 10:34:29 +0000275static void DoInitialMatch(const SCEV *S, Loop *L,
276 SmallVectorImpl<const SCEV *> &Good,
277 SmallVectorImpl<const SCEV *> &Bad,
Dan Gohman20d9ce22010-11-17 21:41:58 +0000278 ScalarEvolution &SE) {
Dan Gohman45774ce2010-02-12 10:34:29 +0000279 // Collect expressions which properly dominate the loop header.
Dan Gohman20d9ce22010-11-17 21:41:58 +0000280 if (SE.properlyDominates(S, L->getHeader())) {
Dan Gohman45774ce2010-02-12 10:34:29 +0000281 Good.push_back(S);
282 return;
Dan Gohman51ad99d2010-01-21 02:09:26 +0000283 }
Dan Gohman45774ce2010-02-12 10:34:29 +0000284
285 // Look at add operands.
286 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
287 for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
288 I != E; ++I)
Dan Gohman20d9ce22010-11-17 21:41:58 +0000289 DoInitialMatch(*I, L, Good, Bad, SE);
Dan Gohman45774ce2010-02-12 10:34:29 +0000290 return;
291 }
292
293 // Look at addrec operands.
294 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
295 if (!AR->getStart()->isZero()) {
Dan Gohman20d9ce22010-11-17 21:41:58 +0000296 DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
Dan Gohman1d2ded72010-05-03 22:09:21 +0000297 DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
Dan Gohman45774ce2010-02-12 10:34:29 +0000298 AR->getStepRecurrence(SE),
Andrew Trick8b55b732011-03-14 16:50:06 +0000299 // FIXME: AR->getNoWrapFlags()
300 AR->getLoop(), SCEV::FlagAnyWrap),
Dan Gohman20d9ce22010-11-17 21:41:58 +0000301 L, Good, Bad, SE);
Dan Gohman45774ce2010-02-12 10:34:29 +0000302 return;
303 }
304
305 // Handle a multiplication by -1 (negation) if it didn't fold.
306 if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
307 if (Mul->getOperand(0)->isAllOnesValue()) {
308 SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end());
309 const SCEV *NewMul = SE.getMulExpr(Ops);
310
311 SmallVector<const SCEV *, 4> MyGood;
312 SmallVector<const SCEV *, 4> MyBad;
Dan Gohman20d9ce22010-11-17 21:41:58 +0000313 DoInitialMatch(NewMul, L, MyGood, MyBad, SE);
Dan Gohman45774ce2010-02-12 10:34:29 +0000314 const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
315 SE.getEffectiveSCEVType(NewMul->getType())));
316 for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(),
317 E = MyGood.end(); I != E; ++I)
318 Good.push_back(SE.getMulExpr(NegOne, *I));
319 for (SmallVectorImpl<const SCEV *>::const_iterator I = MyBad.begin(),
320 E = MyBad.end(); I != E; ++I)
321 Bad.push_back(SE.getMulExpr(NegOne, *I));
322 return;
323 }
324
325 // Ok, we can't do anything interesting. Just stuff the whole thing into a
326 // register and hope for the best.
327 Bad.push_back(S);
328}
329
330/// InitialMatch - Incorporate loop-variant parts of S into this Formula,
331/// attempting to keep all loop-invariant and loop-computable values in a
332/// single base register.
Dan Gohman20d9ce22010-11-17 21:41:58 +0000333void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
Dan Gohman45774ce2010-02-12 10:34:29 +0000334 SmallVector<const SCEV *, 4> Good;
335 SmallVector<const SCEV *, 4> Bad;
Dan Gohman20d9ce22010-11-17 21:41:58 +0000336 DoInitialMatch(S, L, Good, Bad, SE);
Dan Gohman45774ce2010-02-12 10:34:29 +0000337 if (!Good.empty()) {
Dan Gohman9b5d0bb72010-04-08 23:36:27 +0000338 const SCEV *Sum = SE.getAddExpr(Good);
339 if (!Sum->isZero())
340 BaseRegs.push_back(Sum);
Chandler Carruth6e479322013-01-07 15:04:40 +0000341 HasBaseReg = true;
Dan Gohman45774ce2010-02-12 10:34:29 +0000342 }
343 if (!Bad.empty()) {
Dan Gohman9b5d0bb72010-04-08 23:36:27 +0000344 const SCEV *Sum = SE.getAddExpr(Bad);
345 if (!Sum->isZero())
346 BaseRegs.push_back(Sum);
Chandler Carruth6e479322013-01-07 15:04:40 +0000347 HasBaseReg = true;
Dan Gohman45774ce2010-02-12 10:34:29 +0000348 }
349}
350
351/// getNumRegs - Return the total number of register operands used by this
352/// formula. This does not include register uses implied by non-constant
353/// addrec strides.
354unsigned Formula::getNumRegs() const {
355 return !!ScaledReg + BaseRegs.size();
356}
357
358/// getType - Return the type of this formula, if it has one, or null
359/// otherwise. This type is meaningless except for the bit size.
Chris Lattner229907c2011-07-18 04:54:35 +0000360Type *Formula::getType() const {
Dan Gohman45774ce2010-02-12 10:34:29 +0000361 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
362 ScaledReg ? ScaledReg->getType() :
Chandler Carruth6e479322013-01-07 15:04:40 +0000363 BaseGV ? BaseGV->getType() :
Dan Gohman45774ce2010-02-12 10:34:29 +0000364 0;
365}
366
Dan Gohman80a96082010-05-20 15:17:54 +0000367/// DeleteBaseReg - Delete the given base reg from the BaseRegs list.
368void Formula::DeleteBaseReg(const SCEV *&S) {
369 if (&S != &BaseRegs.back())
370 std::swap(S, BaseRegs.back());
371 BaseRegs.pop_back();
372}
373
Dan Gohman45774ce2010-02-12 10:34:29 +0000374/// referencesReg - Test if this formula references the given register.
375bool Formula::referencesReg(const SCEV *S) const {
376 return S == ScaledReg ||
377 std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
378}
379
380/// hasRegsUsedByUsesOtherThan - Test whether this formula uses registers
381/// which are used by uses other than the use with the given index.
382bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
383 const RegUseTracker &RegUses) const {
384 if (ScaledReg)
385 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
386 return true;
387 for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
388 E = BaseRegs.end(); I != E; ++I)
389 if (RegUses.isRegUsedByUsesOtherThan(*I, LUIdx))
390 return true;
391 return false;
392}
393
394void Formula::print(raw_ostream &OS) const {
395 bool First = true;
Chandler Carruth6e479322013-01-07 15:04:40 +0000396 if (BaseGV) {
Dan Gohman45774ce2010-02-12 10:34:29 +0000397 if (!First) OS << " + "; else First = false;
Chandler Carruthd48cdbf2014-01-09 02:29:41 +0000398 BaseGV->printAsOperand(OS, /*PrintType=*/false);
Dan Gohman45774ce2010-02-12 10:34:29 +0000399 }
Chandler Carruth6e479322013-01-07 15:04:40 +0000400 if (BaseOffset != 0) {
Dan Gohman45774ce2010-02-12 10:34:29 +0000401 if (!First) OS << " + "; else First = false;
Chandler Carruth6e479322013-01-07 15:04:40 +0000402 OS << BaseOffset;
Dan Gohman45774ce2010-02-12 10:34:29 +0000403 }
404 for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
405 E = BaseRegs.end(); I != E; ++I) {
406 if (!First) OS << " + "; else First = false;
407 OS << "reg(" << **I << ')';
408 }
Chandler Carruth6e479322013-01-07 15:04:40 +0000409 if (HasBaseReg && BaseRegs.empty()) {
Dan Gohman06ab08f2010-05-18 22:35:55 +0000410 if (!First) OS << " + "; else First = false;
411 OS << "**error: HasBaseReg**";
Chandler Carruth6e479322013-01-07 15:04:40 +0000412 } else if (!HasBaseReg && !BaseRegs.empty()) {
Dan Gohman06ab08f2010-05-18 22:35:55 +0000413 if (!First) OS << " + "; else First = false;
414 OS << "**error: !HasBaseReg**";
415 }
Chandler Carruth6e479322013-01-07 15:04:40 +0000416 if (Scale != 0) {
Dan Gohman45774ce2010-02-12 10:34:29 +0000417 if (!First) OS << " + "; else First = false;
Chandler Carruth6e479322013-01-07 15:04:40 +0000418 OS << Scale << "*reg(";
Dan Gohman45774ce2010-02-12 10:34:29 +0000419 if (ScaledReg)
420 OS << *ScaledReg;
421 else
422 OS << "<unknown>";
423 OS << ')';
424 }
Dan Gohman6136e942011-05-03 00:46:49 +0000425 if (UnfoldedOffset != 0) {
Arnaud A. de Grandmaison75c9e6d2014-03-15 22:13:15 +0000426 if (!First) OS << " + ";
Dan Gohman6136e942011-05-03 00:46:49 +0000427 OS << "imm(" << UnfoldedOffset << ')';
428 }
Dan Gohman45774ce2010-02-12 10:34:29 +0000429}
430
Manman Ren49d684e2012-09-12 05:06:18 +0000431#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Dan Gohman45774ce2010-02-12 10:34:29 +0000432void Formula::dump() const {
433 print(errs()); errs() << '\n';
434}
Manman Renc3366cc2012-09-06 19:55:56 +0000435#endif
Dan Gohman45774ce2010-02-12 10:34:29 +0000436
Dan Gohman85af2562010-02-19 19:32:49 +0000437/// isAddRecSExtable - Return true if the given addrec can be sign-extended
438/// without changing its value.
439static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
Chris Lattner229907c2011-07-18 04:54:35 +0000440 Type *WideTy =
Dan Gohmanab5fb7f2010-05-20 19:44:23 +0000441 IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
Dan Gohman85af2562010-02-19 19:32:49 +0000442 return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
443}
444
445/// isAddSExtable - Return true if the given add can be sign-extended
446/// without changing its value.
447static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
Chris Lattner229907c2011-07-18 04:54:35 +0000448 Type *WideTy =
Dan Gohmanab5fb7f2010-05-20 19:44:23 +0000449 IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
Dan Gohman85af2562010-02-19 19:32:49 +0000450 return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
451}
452
Dan Gohmanab542222010-06-24 16:45:11 +0000453/// isMulSExtable - Return true if the given mul can be sign-extended
Dan Gohman85af2562010-02-19 19:32:49 +0000454/// without changing its value.
Dan Gohmanab542222010-06-24 16:45:11 +0000455static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
Chris Lattner229907c2011-07-18 04:54:35 +0000456 Type *WideTy =
Dan Gohmanab542222010-06-24 16:45:11 +0000457 IntegerType::get(SE.getContext(),
458 SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
459 return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
Dan Gohman85af2562010-02-19 19:32:49 +0000460}
461
Dan Gohman4eebb942010-02-19 19:35:48 +0000462/// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined
463/// and if the remainder is known to be zero, or null otherwise. If
464/// IgnoreSignificantBits is true, expressions like (X * Y) /s Y are simplified
465/// to Y, ignoring that the multiplication may overflow, which is useful when
466/// the result will be used in a context where the most significant bits are
467/// ignored.
468static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
469 ScalarEvolution &SE,
470 bool IgnoreSignificantBits = false) {
Dan Gohman45774ce2010-02-12 10:34:29 +0000471 // Handle the trivial case, which works for any SCEV type.
472 if (LHS == RHS)
Dan Gohman1d2ded72010-05-03 22:09:21 +0000473 return SE.getConstant(LHS->getType(), 1);
Dan Gohman45774ce2010-02-12 10:34:29 +0000474
Dan Gohman47ddf762010-06-24 16:51:25 +0000475 // Handle a few RHS special cases.
476 const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
477 if (RC) {
478 const APInt &RA = RC->getValue()->getValue();
479 // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
480 // some folding.
481 if (RA.isAllOnesValue())
482 return SE.getMulExpr(LHS, RC);
483 // Handle x /s 1 as x.
484 if (RA == 1)
485 return LHS;
486 }
Dan Gohman45774ce2010-02-12 10:34:29 +0000487
488 // Check for a division of a constant by a constant.
489 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
Dan Gohman45774ce2010-02-12 10:34:29 +0000490 if (!RC)
491 return 0;
Dan Gohman47ddf762010-06-24 16:51:25 +0000492 const APInt &LA = C->getValue()->getValue();
493 const APInt &RA = RC->getValue()->getValue();
494 if (LA.srem(RA) != 0)
Dan Gohman45774ce2010-02-12 10:34:29 +0000495 return 0;
Dan Gohman47ddf762010-06-24 16:51:25 +0000496 return SE.getConstant(LA.sdiv(RA));
Dan Gohman45774ce2010-02-12 10:34:29 +0000497 }
498
Dan Gohman85af2562010-02-19 19:32:49 +0000499 // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
Dan Gohman45774ce2010-02-12 10:34:29 +0000500 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
Dan Gohman85af2562010-02-19 19:32:49 +0000501 if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
Dan Gohman4eebb942010-02-19 19:35:48 +0000502 const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
503 IgnoreSignificantBits);
Dan Gohman85af2562010-02-19 19:32:49 +0000504 if (!Step) return 0;
Dan Gohman129a8162010-08-19 01:02:31 +0000505 const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
506 IgnoreSignificantBits);
507 if (!Start) return 0;
Andrew Trick8b55b732011-03-14 16:50:06 +0000508 // FlagNW is independent of the start value, step direction, and is
509 // preserved with smaller magnitude steps.
510 // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
511 return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);
Dan Gohman85af2562010-02-19 19:32:49 +0000512 }
Dan Gohman963b1c12010-06-24 16:57:52 +0000513 return 0;
Dan Gohman45774ce2010-02-12 10:34:29 +0000514 }
515
Dan Gohman85af2562010-02-19 19:32:49 +0000516 // Distribute the sdiv over add operands, if the add doesn't overflow.
Dan Gohman45774ce2010-02-12 10:34:29 +0000517 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
Dan Gohman85af2562010-02-19 19:32:49 +0000518 if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
519 SmallVector<const SCEV *, 8> Ops;
520 for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
521 I != E; ++I) {
Dan Gohman4eebb942010-02-19 19:35:48 +0000522 const SCEV *Op = getExactSDiv(*I, RHS, SE,
523 IgnoreSignificantBits);
Dan Gohman85af2562010-02-19 19:32:49 +0000524 if (!Op) return 0;
525 Ops.push_back(Op);
526 }
527 return SE.getAddExpr(Ops);
Dan Gohman45774ce2010-02-12 10:34:29 +0000528 }
Dan Gohman963b1c12010-06-24 16:57:52 +0000529 return 0;
Dan Gohman45774ce2010-02-12 10:34:29 +0000530 }
531
532 // Check for a multiply operand that we can pull RHS out of.
Dan Gohman963b1c12010-06-24 16:57:52 +0000533 if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
Dan Gohman85af2562010-02-19 19:32:49 +0000534 if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
Dan Gohman45774ce2010-02-12 10:34:29 +0000535 SmallVector<const SCEV *, 4> Ops;
536 bool Found = false;
537 for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end();
538 I != E; ++I) {
Dan Gohman6b733fc2010-05-20 16:23:28 +0000539 const SCEV *S = *I;
Dan Gohman45774ce2010-02-12 10:34:29 +0000540 if (!Found)
Dan Gohman6b733fc2010-05-20 16:23:28 +0000541 if (const SCEV *Q = getExactSDiv(S, RHS, SE,
Dan Gohman4eebb942010-02-19 19:35:48 +0000542 IgnoreSignificantBits)) {
Dan Gohman6b733fc2010-05-20 16:23:28 +0000543 S = Q;
Dan Gohman45774ce2010-02-12 10:34:29 +0000544 Found = true;
Dan Gohman45774ce2010-02-12 10:34:29 +0000545 }
Dan Gohman6b733fc2010-05-20 16:23:28 +0000546 Ops.push_back(S);
Dan Gohman45774ce2010-02-12 10:34:29 +0000547 }
548 return Found ? SE.getMulExpr(Ops) : 0;
549 }
Dan Gohman963b1c12010-06-24 16:57:52 +0000550 return 0;
551 }
Dan Gohman45774ce2010-02-12 10:34:29 +0000552
553 // Otherwise we don't know.
554 return 0;
555}
556
557/// ExtractImmediate - If S involves the addition of a constant integer value,
558/// return that integer value, and mutate S to point to a new SCEV with that
559/// value excluded.
560static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
561 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
562 if (C->getValue()->getValue().getMinSignedBits() <= 64) {
Dan Gohman1d2ded72010-05-03 22:09:21 +0000563 S = SE.getConstant(C->getType(), 0);
Dan Gohman45774ce2010-02-12 10:34:29 +0000564 return C->getValue()->getSExtValue();
565 }
566 } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
567 SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
568 int64_t Result = ExtractImmediate(NewOps.front(), SE);
Dan Gohman081ffcd2010-08-13 21:17:19 +0000569 if (Result != 0)
570 S = SE.getAddExpr(NewOps);
Dan Gohman45774ce2010-02-12 10:34:29 +0000571 return Result;
572 } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
573 SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
574 int64_t Result = ExtractImmediate(NewOps.front(), SE);
Dan Gohman081ffcd2010-08-13 21:17:19 +0000575 if (Result != 0)
Andrew Trick8b55b732011-03-14 16:50:06 +0000576 S = SE.getAddRecExpr(NewOps, AR->getLoop(),
577 // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
578 SCEV::FlagAnyWrap);
Dan Gohman45774ce2010-02-12 10:34:29 +0000579 return Result;
580 }
581 return 0;
582}
583
584/// ExtractSymbol - If S involves the addition of a GlobalValue address,
585/// return that symbol, and mutate S to point to a new SCEV with that
586/// value excluded.
587static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
588 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
589 if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
Dan Gohman1d2ded72010-05-03 22:09:21 +0000590 S = SE.getConstant(GV->getType(), 0);
Dan Gohman45774ce2010-02-12 10:34:29 +0000591 return GV;
592 }
593 } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
594 SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
595 GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
Dan Gohman081ffcd2010-08-13 21:17:19 +0000596 if (Result)
597 S = SE.getAddExpr(NewOps);
Dan Gohman45774ce2010-02-12 10:34:29 +0000598 return Result;
599 } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
600 SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
601 GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
Dan Gohman081ffcd2010-08-13 21:17:19 +0000602 if (Result)
Andrew Trick8b55b732011-03-14 16:50:06 +0000603 S = SE.getAddRecExpr(NewOps, AR->getLoop(),
604 // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
605 SCEV::FlagAnyWrap);
Dan Gohman45774ce2010-02-12 10:34:29 +0000606 return Result;
607 }
608 return 0;
Nate Begemanb18121e2004-10-18 21:08:22 +0000609}
610
Dan Gohmand0b1fbd2009-02-18 00:08:39 +0000611/// isAddressUse - Returns true if the specified instruction is using the
Dale Johannesen9efd2ce2008-12-05 21:47:27 +0000612/// specified value as an address.
613static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
614 bool isAddress = isa<LoadInst>(Inst);
615 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
616 if (SI->getOperand(1) == OperandVal)
617 isAddress = true;
618 } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
619 // Addressing modes can also be folded into prefetches and a variety
620 // of intrinsics.
621 switch (II->getIntrinsicID()) {
622 default: break;
623 case Intrinsic::prefetch:
Dale Johannesen9efd2ce2008-12-05 21:47:27 +0000624 case Intrinsic::x86_sse_storeu_ps:
625 case Intrinsic::x86_sse2_storeu_pd:
626 case Intrinsic::x86_sse2_storeu_dq:
627 case Intrinsic::x86_sse2_storel_dq:
Gabor Greif8ae30952010-06-30 09:15:28 +0000628 if (II->getArgOperand(0) == OperandVal)
Dale Johannesen9efd2ce2008-12-05 21:47:27 +0000629 isAddress = true;
630 break;
631 }
632 }
633 return isAddress;
634}
Chris Lattnere4ed42a2005-10-03 01:04:44 +0000635
Dan Gohman917ffe42009-03-09 21:01:17 +0000636/// getAccessType - Return the type of the memory being accessed.
Chris Lattner229907c2011-07-18 04:54:35 +0000637static Type *getAccessType(const Instruction *Inst) {
638 Type *AccessTy = Inst->getType();
Dan Gohman917ffe42009-03-09 21:01:17 +0000639 if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
Dan Gohman14d13392009-05-18 16:45:28 +0000640 AccessTy = SI->getOperand(0)->getType();
Dan Gohman917ffe42009-03-09 21:01:17 +0000641 else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
642 // Addressing modes can also be folded into prefetches and a variety
643 // of intrinsics.
644 switch (II->getIntrinsicID()) {
645 default: break;
646 case Intrinsic::x86_sse_storeu_ps:
647 case Intrinsic::x86_sse2_storeu_pd:
648 case Intrinsic::x86_sse2_storeu_dq:
649 case Intrinsic::x86_sse2_storel_dq:
Gabor Greif8ae30952010-06-30 09:15:28 +0000650 AccessTy = II->getArgOperand(0)->getType();
Dan Gohman917ffe42009-03-09 21:01:17 +0000651 break;
652 }
653 }
Dan Gohman45774ce2010-02-12 10:34:29 +0000654
655 // All pointers have the same requirements, so canonicalize them to an
656 // arbitrary pointer type to minimize variation.
Chris Lattner229907c2011-07-18 04:54:35 +0000657 if (PointerType *PTy = dyn_cast<PointerType>(AccessTy))
Dan Gohman45774ce2010-02-12 10:34:29 +0000658 AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
659 PTy->getAddressSpace());
660
Dan Gohman14d13392009-05-18 16:45:28 +0000661 return AccessTy;
Dan Gohman917ffe42009-03-09 21:01:17 +0000662}
663
Andrew Trick5df90962011-12-06 03:13:31 +0000664/// isExistingPhi - Return true if this AddRec is already a phi in its loop.
665static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
666 for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
667 PHINode *PN = dyn_cast<PHINode>(I); ++I) {
668 if (SE.isSCEVable(PN->getType()) &&
669 (SE.getEffectiveSCEVType(PN->getType()) ==
670 SE.getEffectiveSCEVType(AR->getType())) &&
671 SE.getSCEV(PN) == AR)
672 return true;
673 }
674 return false;
675}
676
Andrew Trickd5d2db92012-01-10 01:45:08 +0000677/// Check if expanding this expression is likely to incur significant cost. This
678/// is tricky because SCEV doesn't track which expressions are actually computed
679/// by the current IR.
680///
681/// We currently allow expansion of IV increments that involve adds,
682/// multiplication by constants, and AddRecs from existing phis.
683///
684/// TODO: Allow UDivExpr if we can find an existing IV increment that is an
685/// obvious multiple of the UDivExpr.
686static bool isHighCostExpansion(const SCEV *S,
687 SmallPtrSet<const SCEV*, 8> &Processed,
688 ScalarEvolution &SE) {
689 // Zero/One operand expressions
690 switch (S->getSCEVType()) {
691 case scUnknown:
692 case scConstant:
693 return false;
694 case scTruncate:
695 return isHighCostExpansion(cast<SCEVTruncateExpr>(S)->getOperand(),
696 Processed, SE);
697 case scZeroExtend:
698 return isHighCostExpansion(cast<SCEVZeroExtendExpr>(S)->getOperand(),
699 Processed, SE);
700 case scSignExtend:
701 return isHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(),
702 Processed, SE);
703 }
704
705 if (!Processed.insert(S))
706 return false;
707
708 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
709 for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
710 I != E; ++I) {
711 if (isHighCostExpansion(*I, Processed, SE))
712 return true;
713 }
714 return false;
715 }
716
717 if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
718 if (Mul->getNumOperands() == 2) {
719 // Multiplication by a constant is ok
720 if (isa<SCEVConstant>(Mul->getOperand(0)))
721 return isHighCostExpansion(Mul->getOperand(1), Processed, SE);
722
723 // If we have the value of one operand, check if an existing
724 // multiplication already generates this expression.
725 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) {
726 Value *UVal = U->getValue();
Chandler Carruthcdf47882014-03-09 03:16:01 +0000727 for (User *UR : UVal->users()) {
Andrew Trick14779cc2012-03-26 20:28:37 +0000728 // If U is a constant, it may be used by a ConstantExpr.
Chandler Carruthcdf47882014-03-09 03:16:01 +0000729 Instruction *UI = dyn_cast<Instruction>(UR);
730 if (UI && UI->getOpcode() == Instruction::Mul &&
731 SE.isSCEVable(UI->getType())) {
732 return SE.getSCEV(UI) == Mul;
Andrew Trickd5d2db92012-01-10 01:45:08 +0000733 }
734 }
735 }
736 }
737 }
738
739 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
740 if (isExistingPhi(AR, SE))
741 return false;
742 }
743
744 // Fow now, consider any other type of expression (div/mul/min/max) high cost.
745 return true;
746}
747
Dan Gohman45774ce2010-02-12 10:34:29 +0000748/// DeleteTriviallyDeadInstructions - If any of the instructions is the
749/// specified set are trivially dead, delete them and see if this makes any of
750/// their operands subsequently dead.
751static bool
752DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
753 bool Changed = false;
754
755 while (!DeadInsts.empty()) {
Richard Smithad9c8e82012-08-21 20:35:14 +0000756 Value *V = DeadInsts.pop_back_val();
757 Instruction *I = dyn_cast_or_null<Instruction>(V);
Dan Gohman45774ce2010-02-12 10:34:29 +0000758
759 if (I == 0 || !isInstructionTriviallyDead(I))
760 continue;
761
762 for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
763 if (Instruction *U = dyn_cast<Instruction>(*OI)) {
764 *OI = 0;
765 if (U->use_empty())
766 DeadInsts.push_back(U);
767 }
768
769 I->eraseFromParent();
770 Changed = true;
771 }
772
773 return Changed;
774}
775
Dan Gohman045f8192010-01-22 00:46:49 +0000776namespace {
Quentin Colombet8aa7abe2013-05-31 17:20:29 +0000777class LSRUse;
778}
779// Check if it is legal to fold 2 base registers.
780static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU,
781 const Formula &F);
Quentin Colombetbf490d42013-05-31 21:29:03 +0000782// Get the cost of the scaling factor used in F for LU.
783static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
784 const LSRUse &LU, const Formula &F);
Quentin Colombet8aa7abe2013-05-31 17:20:29 +0000785
786namespace {
Jim Grosbach60f48542009-11-17 17:53:56 +0000787
Dan Gohman45774ce2010-02-12 10:34:29 +0000788/// Cost - This class is used to measure and compare candidate formulae.
789class Cost {
790 /// TODO: Some of these could be merged. Also, a lexical ordering
791 /// isn't always optimal.
792 unsigned NumRegs;
793 unsigned AddRecCost;
794 unsigned NumIVMuls;
795 unsigned NumBaseAdds;
796 unsigned ImmCost;
797 unsigned SetupCost;
Quentin Colombetbf490d42013-05-31 21:29:03 +0000798 unsigned ScaleCost;
Nate Begemane68bcd12005-07-30 00:15:07 +0000799
Dan Gohman45774ce2010-02-12 10:34:29 +0000800public:
801 Cost()
802 : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
Quentin Colombetbf490d42013-05-31 21:29:03 +0000803 SetupCost(0), ScaleCost(0) {}
Jim Grosbach60f48542009-11-17 17:53:56 +0000804
Dan Gohman45774ce2010-02-12 10:34:29 +0000805 bool operator<(const Cost &Other) const;
Dan Gohman045f8192010-01-22 00:46:49 +0000806
Tim Northoverbc6659c2014-01-22 13:27:00 +0000807 void Lose();
Dan Gohman045f8192010-01-22 00:46:49 +0000808
Andrew Trick784729d2011-09-26 23:11:04 +0000809#ifndef NDEBUG
810 // Once any of the metrics loses, they must all remain losers.
811 bool isValid() {
812 return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
Quentin Colombetbf490d42013-05-31 21:29:03 +0000813 | ImmCost | SetupCost | ScaleCost) != ~0u)
Andrew Trick784729d2011-09-26 23:11:04 +0000814 || ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
Quentin Colombetbf490d42013-05-31 21:29:03 +0000815 & ImmCost & SetupCost & ScaleCost) == ~0u);
Andrew Trick784729d2011-09-26 23:11:04 +0000816 }
817#endif
818
819 bool isLoser() {
820 assert(isValid() && "invalid cost");
821 return NumRegs == ~0u;
822 }
823
Quentin Colombet8aa7abe2013-05-31 17:20:29 +0000824 void RateFormula(const TargetTransformInfo &TTI,
825 const Formula &F,
Dan Gohman45774ce2010-02-12 10:34:29 +0000826 SmallPtrSet<const SCEV *, 16> &Regs,
827 const DenseSet<const SCEV *> &VisitedRegs,
828 const Loop *L,
829 const SmallVectorImpl<int64_t> &Offsets,
Andrew Trick5df90962011-12-06 03:13:31 +0000830 ScalarEvolution &SE, DominatorTree &DT,
Quentin Colombet8aa7abe2013-05-31 17:20:29 +0000831 const LSRUse &LU,
Andrew Trick5df90962011-12-06 03:13:31 +0000832 SmallPtrSet<const SCEV *, 16> *LoserRegs = 0);
Dan Gohman045f8192010-01-22 00:46:49 +0000833
Dan Gohman45774ce2010-02-12 10:34:29 +0000834 void print(raw_ostream &OS) const;
835 void dump() const;
Dan Gohman045f8192010-01-22 00:46:49 +0000836
Dan Gohman45774ce2010-02-12 10:34:29 +0000837private:
838 void RateRegister(const SCEV *Reg,
839 SmallPtrSet<const SCEV *, 16> &Regs,
840 const Loop *L,
841 ScalarEvolution &SE, DominatorTree &DT);
Dan Gohman5b18f032010-02-13 02:06:02 +0000842 void RatePrimaryRegister(const SCEV *Reg,
843 SmallPtrSet<const SCEV *, 16> &Regs,
844 const Loop *L,
Andrew Trick5df90962011-12-06 03:13:31 +0000845 ScalarEvolution &SE, DominatorTree &DT,
846 SmallPtrSet<const SCEV *, 16> *LoserRegs);
Dan Gohman45774ce2010-02-12 10:34:29 +0000847};
848
849}
850
851/// RateRegister - Tally up interesting quantities from the given register.
852void Cost::RateRegister(const SCEV *Reg,
853 SmallPtrSet<const SCEV *, 16> &Regs,
854 const Loop *L,
855 ScalarEvolution &SE, DominatorTree &DT) {
Dan Gohman5b18f032010-02-13 02:06:02 +0000856 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
Andrew Trickbc6de902011-09-29 01:33:38 +0000857 // If this is an addrec for another loop, don't second-guess its addrec phi
858 // nodes. LSR isn't currently smart enough to reason about more than one
Andrew Trickd97b83e2012-03-22 22:42:45 +0000859 // loop at a time. LSR has already run on inner loops, will not run on outer
860 // loops, and cannot be expected to change sibling loops.
861 if (AR->getLoop() != L) {
862 // If the AddRec exists, consider it's register free and leave it alone.
Andrew Trick5df90962011-12-06 03:13:31 +0000863 if (isExistingPhi(AR, SE))
864 return;
865
Andrew Trickd97b83e2012-03-22 22:42:45 +0000866 // Otherwise, do not consider this formula at all.
Tim Northoverbc6659c2014-01-22 13:27:00 +0000867 Lose();
Andrew Trickd97b83e2012-03-22 22:42:45 +0000868 return;
Dan Gohman45774ce2010-02-12 10:34:29 +0000869 }
Andrew Trickd97b83e2012-03-22 22:42:45 +0000870 AddRecCost += 1; /// TODO: This should be a function of the stride.
Dan Gohman45774ce2010-02-12 10:34:29 +0000871
Dan Gohman5b18f032010-02-13 02:06:02 +0000872 // Add the step value register, if it needs one.
873 // TODO: The non-affine case isn't precisely modeled here.
Andrew Trick8868fae2011-09-26 23:35:25 +0000874 if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
875 if (!Regs.count(AR->getOperand(1))) {
Dan Gohman5b18f032010-02-13 02:06:02 +0000876 RateRegister(AR->getOperand(1), Regs, L, SE, DT);
Andrew Trick8868fae2011-09-26 23:35:25 +0000877 if (isLoser())
878 return;
879 }
880 }
Dan Gohman45774ce2010-02-12 10:34:29 +0000881 }
Dan Gohman5b18f032010-02-13 02:06:02 +0000882 ++NumRegs;
883
884 // Rough heuristic; favor registers which don't require extra setup
885 // instructions in the preheader.
886 if (!isa<SCEVUnknown>(Reg) &&
887 !isa<SCEVConstant>(Reg) &&
888 !(isa<SCEVAddRecExpr>(Reg) &&
889 (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
890 isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
891 ++SetupCost;
Dan Gohman34f37e02010-10-07 23:41:58 +0000892
893 NumIVMuls += isa<SCEVMulExpr>(Reg) &&
Dan Gohmanafd6db92010-11-17 21:23:15 +0000894 SE.hasComputableLoopEvolution(Reg, L);
Dan Gohman5b18f032010-02-13 02:06:02 +0000895}
896
897/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
Andrew Trick5df90962011-12-06 03:13:31 +0000898/// before, rate it. Optional LoserRegs provides a way to declare any formula
899/// that refers to one of those regs an instant loser.
Dan Gohman5b18f032010-02-13 02:06:02 +0000900void Cost::RatePrimaryRegister(const SCEV *Reg,
Dan Gohman0849ed52010-02-16 19:42:34 +0000901 SmallPtrSet<const SCEV *, 16> &Regs,
902 const Loop *L,
Andrew Trick5df90962011-12-06 03:13:31 +0000903 ScalarEvolution &SE, DominatorTree &DT,
904 SmallPtrSet<const SCEV *, 16> *LoserRegs) {
905 if (LoserRegs && LoserRegs->count(Reg)) {
Tim Northoverbc6659c2014-01-22 13:27:00 +0000906 Lose();
Andrew Trick5df90962011-12-06 03:13:31 +0000907 return;
908 }
909 if (Regs.insert(Reg)) {
Dan Gohman5b18f032010-02-13 02:06:02 +0000910 RateRegister(Reg, Regs, L, SE, DT);
Andrew Tricka1c01ba2013-03-19 04:14:57 +0000911 if (LoserRegs && isLoser())
Andrew Trick5df90962011-12-06 03:13:31 +0000912 LoserRegs->insert(Reg);
913 }
Dan Gohman45774ce2010-02-12 10:34:29 +0000914}
915
Quentin Colombet8aa7abe2013-05-31 17:20:29 +0000916void Cost::RateFormula(const TargetTransformInfo &TTI,
917 const Formula &F,
Dan Gohman45774ce2010-02-12 10:34:29 +0000918 SmallPtrSet<const SCEV *, 16> &Regs,
919 const DenseSet<const SCEV *> &VisitedRegs,
920 const Loop *L,
921 const SmallVectorImpl<int64_t> &Offsets,
Andrew Trick5df90962011-12-06 03:13:31 +0000922 ScalarEvolution &SE, DominatorTree &DT,
Quentin Colombet8aa7abe2013-05-31 17:20:29 +0000923 const LSRUse &LU,
Andrew Trick5df90962011-12-06 03:13:31 +0000924 SmallPtrSet<const SCEV *, 16> *LoserRegs) {
Dan Gohman45774ce2010-02-12 10:34:29 +0000925 // Tally up the registers.
926 if (const SCEV *ScaledReg = F.ScaledReg) {
927 if (VisitedRegs.count(ScaledReg)) {
Tim Northoverbc6659c2014-01-22 13:27:00 +0000928 Lose();
Dan Gohman45774ce2010-02-12 10:34:29 +0000929 return;
930 }
Andrew Trick5df90962011-12-06 03:13:31 +0000931 RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
Andrew Trick784729d2011-09-26 23:11:04 +0000932 if (isLoser())
933 return;
Dan Gohman45774ce2010-02-12 10:34:29 +0000934 }
935 for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
936 E = F.BaseRegs.end(); I != E; ++I) {
937 const SCEV *BaseReg = *I;
938 if (VisitedRegs.count(BaseReg)) {
Tim Northoverbc6659c2014-01-22 13:27:00 +0000939 Lose();
Dan Gohman45774ce2010-02-12 10:34:29 +0000940 return;
941 }
Andrew Trick5df90962011-12-06 03:13:31 +0000942 RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
Andrew Trick784729d2011-09-26 23:11:04 +0000943 if (isLoser())
944 return;
Dan Gohman45774ce2010-02-12 10:34:29 +0000945 }
946
Dan Gohman6136e942011-05-03 00:46:49 +0000947 // Determine how many (unfolded) adds we'll need inside the loop.
948 size_t NumBaseParts = F.BaseRegs.size() + (F.UnfoldedOffset != 0);
949 if (NumBaseParts > 1)
Quentin Colombet8aa7abe2013-05-31 17:20:29 +0000950 // Do not count the base and a possible second register if the target
951 // allows to fold 2 registers.
952 NumBaseAdds += NumBaseParts - (1 + isLegal2RegAMUse(TTI, LU, F));
Dan Gohman45774ce2010-02-12 10:34:29 +0000953
Quentin Colombetbf490d42013-05-31 21:29:03 +0000954 // Accumulate non-free scaling amounts.
955 ScaleCost += getScalingFactorCost(TTI, LU, F);
956
Dan Gohman45774ce2010-02-12 10:34:29 +0000957 // Tally up the non-zero immediates.
958 for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
959 E = Offsets.end(); I != E; ++I) {
Chandler Carruth6e479322013-01-07 15:04:40 +0000960 int64_t Offset = (uint64_t)*I + F.BaseOffset;
961 if (F.BaseGV)
Dan Gohman45774ce2010-02-12 10:34:29 +0000962 ImmCost += 64; // Handle symbolic values conservatively.
963 // TODO: This should probably be the pointer size.
964 else if (Offset != 0)
965 ImmCost += APInt(64, Offset, true).getMinSignedBits();
966 }
Andrew Trick784729d2011-09-26 23:11:04 +0000967 assert(isValid() && "invalid cost");
Dan Gohman45774ce2010-02-12 10:34:29 +0000968}
969
Tim Northoverbc6659c2014-01-22 13:27:00 +0000970/// Lose - Set this cost to a losing value.
971void Cost::Lose() {
Dan Gohman45774ce2010-02-12 10:34:29 +0000972 NumRegs = ~0u;
973 AddRecCost = ~0u;
974 NumIVMuls = ~0u;
975 NumBaseAdds = ~0u;
976 ImmCost = ~0u;
977 SetupCost = ~0u;
Quentin Colombetbf490d42013-05-31 21:29:03 +0000978 ScaleCost = ~0u;
Dan Gohman45774ce2010-02-12 10:34:29 +0000979}
980
981/// operator< - Choose the lower cost.
982bool Cost::operator<(const Cost &Other) const {
Benjamin Kramerb2f034b2014-03-03 19:58:30 +0000983 return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost,
984 ImmCost, SetupCost) <
985 std::tie(Other.NumRegs, Other.AddRecCost, Other.NumIVMuls,
986 Other.NumBaseAdds, Other.ScaleCost, Other.ImmCost,
987 Other.SetupCost);
Dan Gohman45774ce2010-02-12 10:34:29 +0000988}
989
990void Cost::print(raw_ostream &OS) const {
991 OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
992 if (AddRecCost != 0)
993 OS << ", with addrec cost " << AddRecCost;
994 if (NumIVMuls != 0)
995 OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s");
996 if (NumBaseAdds != 0)
997 OS << ", plus " << NumBaseAdds << " base add"
998 << (NumBaseAdds == 1 ? "" : "s");
Quentin Colombetbf490d42013-05-31 21:29:03 +0000999 if (ScaleCost != 0)
1000 OS << ", plus " << ScaleCost << " scale cost";
Dan Gohman45774ce2010-02-12 10:34:29 +00001001 if (ImmCost != 0)
1002 OS << ", plus " << ImmCost << " imm cost";
1003 if (SetupCost != 0)
1004 OS << ", plus " << SetupCost << " setup cost";
1005}
1006
Manman Ren49d684e2012-09-12 05:06:18 +00001007#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Dan Gohman45774ce2010-02-12 10:34:29 +00001008void Cost::dump() const {
1009 print(errs()); errs() << '\n';
1010}
Manman Renc3366cc2012-09-06 19:55:56 +00001011#endif
Dan Gohman45774ce2010-02-12 10:34:29 +00001012
1013namespace {
1014
1015/// LSRFixup - An operand value in an instruction which is to be replaced
1016/// with some equivalent, possibly strength-reduced, replacement.
1017struct LSRFixup {
1018 /// UserInst - The instruction which will be updated.
1019 Instruction *UserInst;
1020
1021 /// OperandValToReplace - The operand of the instruction which will
1022 /// be replaced. The operand may be used more than once; every instance
1023 /// will be replaced.
1024 Value *OperandValToReplace;
1025
Dan Gohmand006ab92010-04-07 22:27:08 +00001026 /// PostIncLoops - If this user is to use the post-incremented value of an
Dan Gohman45774ce2010-02-12 10:34:29 +00001027 /// induction variable, this variable is non-null and holds the loop
1028 /// associated with the induction variable.
Dan Gohmand006ab92010-04-07 22:27:08 +00001029 PostIncLoopSet PostIncLoops;
Dan Gohman45774ce2010-02-12 10:34:29 +00001030
1031 /// LUIdx - The index of the LSRUse describing the expression which
1032 /// this fixup needs, minus an offset (below).
1033 size_t LUIdx;
1034
1035 /// Offset - A constant offset to be added to the LSRUse expression.
1036 /// This allows multiple fixups to share the same LSRUse with different
1037 /// offsets, for example in an unrolled loop.
1038 int64_t Offset;
1039
Dan Gohmand006ab92010-04-07 22:27:08 +00001040 bool isUseFullyOutsideLoop(const Loop *L) const;
1041
Dan Gohman45774ce2010-02-12 10:34:29 +00001042 LSRFixup();
1043
1044 void print(raw_ostream &OS) const;
1045 void dump() const;
1046};
1047
1048}
1049
1050LSRFixup::LSRFixup()
Dan Gohmanab5fb7f2010-05-20 19:44:23 +00001051 : UserInst(0), OperandValToReplace(0), LUIdx(~size_t(0)), Offset(0) {}
Dan Gohman45774ce2010-02-12 10:34:29 +00001052
Dan Gohmand006ab92010-04-07 22:27:08 +00001053/// isUseFullyOutsideLoop - Test whether this fixup always uses its
1054/// value outside of the given loop.
1055bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
1056 // PHI nodes use their value in their incoming blocks.
1057 if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1058 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1059 if (PN->getIncomingValue(i) == OperandValToReplace &&
1060 L->contains(PN->getIncomingBlock(i)))
1061 return false;
1062 return true;
1063 }
1064
1065 return !L->contains(UserInst);
1066}
1067
Dan Gohman45774ce2010-02-12 10:34:29 +00001068void LSRFixup::print(raw_ostream &OS) const {
1069 OS << "UserInst=";
1070 // Store is common and interesting enough to be worth special-casing.
1071 if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1072 OS << "store ";
Chandler Carruthd48cdbf2014-01-09 02:29:41 +00001073 Store->getOperand(0)->printAsOperand(OS, /*PrintType=*/false);
Dan Gohman45774ce2010-02-12 10:34:29 +00001074 } else if (UserInst->getType()->isVoidTy())
1075 OS << UserInst->getOpcodeName();
1076 else
Chandler Carruthd48cdbf2014-01-09 02:29:41 +00001077 UserInst->printAsOperand(OS, /*PrintType=*/false);
Dan Gohman45774ce2010-02-12 10:34:29 +00001078
1079 OS << ", OperandValToReplace=";
Chandler Carruthd48cdbf2014-01-09 02:29:41 +00001080 OperandValToReplace->printAsOperand(OS, /*PrintType=*/false);
Dan Gohman45774ce2010-02-12 10:34:29 +00001081
Dan Gohmand006ab92010-04-07 22:27:08 +00001082 for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(),
1083 E = PostIncLoops.end(); I != E; ++I) {
Dan Gohman45774ce2010-02-12 10:34:29 +00001084 OS << ", PostIncLoop=";
Chandler Carruthd48cdbf2014-01-09 02:29:41 +00001085 (*I)->getHeader()->printAsOperand(OS, /*PrintType=*/false);
Dan Gohman45774ce2010-02-12 10:34:29 +00001086 }
1087
1088 if (LUIdx != ~size_t(0))
1089 OS << ", LUIdx=" << LUIdx;
1090
1091 if (Offset != 0)
1092 OS << ", Offset=" << Offset;
1093}
1094
Manman Ren49d684e2012-09-12 05:06:18 +00001095#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Dan Gohman45774ce2010-02-12 10:34:29 +00001096void LSRFixup::dump() const {
1097 print(errs()); errs() << '\n';
1098}
Manman Renc3366cc2012-09-06 19:55:56 +00001099#endif
Dan Gohman45774ce2010-02-12 10:34:29 +00001100
1101namespace {
1102
1103/// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding
1104/// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*.
1105struct UniquifierDenseMapInfo {
Preston Gurd25c3b6a2013-02-01 20:41:27 +00001106 static SmallVector<const SCEV *, 4> getEmptyKey() {
1107 SmallVector<const SCEV *, 4> V;
Dan Gohman45774ce2010-02-12 10:34:29 +00001108 V.push_back(reinterpret_cast<const SCEV *>(-1));
1109 return V;
1110 }
1111
Preston Gurd25c3b6a2013-02-01 20:41:27 +00001112 static SmallVector<const SCEV *, 4> getTombstoneKey() {
1113 SmallVector<const SCEV *, 4> V;
Dan Gohman45774ce2010-02-12 10:34:29 +00001114 V.push_back(reinterpret_cast<const SCEV *>(-2));
1115 return V;
1116 }
1117
Preston Gurd25c3b6a2013-02-01 20:41:27 +00001118 static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) {
Benjamin Kramer62fb0cf2014-03-15 17:17:48 +00001119 return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
Dan Gohman45774ce2010-02-12 10:34:29 +00001120 }
1121
Preston Gurd25c3b6a2013-02-01 20:41:27 +00001122 static bool isEqual(const SmallVector<const SCEV *, 4> &LHS,
1123 const SmallVector<const SCEV *, 4> &RHS) {
Dan Gohman45774ce2010-02-12 10:34:29 +00001124 return LHS == RHS;
1125 }
1126};
1127
1128/// LSRUse - This class holds the state that LSR keeps for each use in
1129/// IVUsers, as well as uses invented by LSR itself. It includes information
1130/// about what kinds of things can be folded into the user, information about
1131/// the user itself, and information about how the use may be satisfied.
1132/// TODO: Represent multiple users of the same expression in common?
1133class LSRUse {
Preston Gurd25c3b6a2013-02-01 20:41:27 +00001134 DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
Dan Gohman45774ce2010-02-12 10:34:29 +00001135
1136public:
1137 /// KindType - An enum for a kind of use, indicating what types of
1138 /// scaled and immediate operands it might support.
1139 enum KindType {
1140 Basic, ///< A normal use, with no folding.
1141 Special, ///< A special case of basic, allowing -1 scales.
Nadav Rotem4dc976f2012-10-19 21:28:43 +00001142 Address, ///< An address use; folding according to TargetLowering
Dan Gohman45774ce2010-02-12 10:34:29 +00001143 ICmpZero ///< An equality icmp with both operands folded into one.
1144 // TODO: Add a generic icmp too?
Dan Gohman045f8192010-01-22 00:46:49 +00001145 };
Dan Gohman45774ce2010-02-12 10:34:29 +00001146
Benjamin Kramer62fb0cf2014-03-15 17:17:48 +00001147 typedef PointerIntPair<const SCEV *, 2, KindType> SCEVUseKindPair;
1148
Dan Gohman45774ce2010-02-12 10:34:29 +00001149 KindType Kind;
Chris Lattner229907c2011-07-18 04:54:35 +00001150 Type *AccessTy;
Dan Gohman45774ce2010-02-12 10:34:29 +00001151
1152 SmallVector<int64_t, 8> Offsets;
1153 int64_t MinOffset;
1154 int64_t MaxOffset;
1155
1156 /// AllFixupsOutsideLoop - This records whether all of the fixups using this
1157 /// LSRUse are outside of the loop, in which case some special-case heuristics
1158 /// may be used.
1159 bool AllFixupsOutsideLoop;
1160
Andrew Trick57243da2013-10-25 21:35:56 +00001161 /// RigidFormula is set to true to guarantee that this use will be associated
1162 /// with a single formula--the one that initially matched. Some SCEV
1163 /// expressions cannot be expanded. This allows LSR to consider the registers
1164 /// used by those expressions without the need to expand them later after
1165 /// changing the formula.
1166 bool RigidFormula;
1167
Dan Gohman14152082010-07-15 20:24:58 +00001168 /// WidestFixupType - This records the widest use type for any fixup using
1169 /// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
1170 /// max fixup widths to be equivalent, because the narrower one may be relying
1171 /// on the implicit truncation to truncate away bogus bits.
Chris Lattner229907c2011-07-18 04:54:35 +00001172 Type *WidestFixupType;
Dan Gohman14152082010-07-15 20:24:58 +00001173
Dan Gohman45774ce2010-02-12 10:34:29 +00001174 /// Formulae - A list of ways to build a value that can satisfy this user.
1175 /// After the list is populated, one of these is selected heuristically and
1176 /// used to formulate a replacement for OperandValToReplace in UserInst.
1177 SmallVector<Formula, 12> Formulae;
1178
1179 /// Regs - The set of register candidates used by all formulae in this LSRUse.
1180 SmallPtrSet<const SCEV *, 4> Regs;
1181
Chris Lattner229907c2011-07-18 04:54:35 +00001182 LSRUse(KindType K, Type *T) : Kind(K), AccessTy(T),
Dan Gohman45774ce2010-02-12 10:34:29 +00001183 MinOffset(INT64_MAX),
1184 MaxOffset(INT64_MIN),
Dan Gohman14152082010-07-15 20:24:58 +00001185 AllFixupsOutsideLoop(true),
Andrew Trick57243da2013-10-25 21:35:56 +00001186 RigidFormula(false),
Dan Gohman14152082010-07-15 20:24:58 +00001187 WidestFixupType(0) {}
Dan Gohman45774ce2010-02-12 10:34:29 +00001188
Dan Gohman20fab452010-05-19 23:43:12 +00001189 bool HasFormulaWithSameRegs(const Formula &F) const;
Dan Gohman8c16b382010-02-22 04:11:59 +00001190 bool InsertFormula(const Formula &F);
Dan Gohmanf1c7b1b2010-05-18 22:39:15 +00001191 void DeleteFormula(Formula &F);
Dan Gohman4cf99b52010-05-18 23:42:37 +00001192 void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
Dan Gohman45774ce2010-02-12 10:34:29 +00001193
Dan Gohman45774ce2010-02-12 10:34:29 +00001194 void print(raw_ostream &OS) const;
1195 void dump() const;
1196};
1197
Dan Gohman297fb8b2010-06-19 21:21:39 +00001198}
1199
Dan Gohman20fab452010-05-19 23:43:12 +00001200/// HasFormula - Test whether this use as a formula which has the same
1201/// registers as the given formula.
1202bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
Preston Gurd25c3b6a2013-02-01 20:41:27 +00001203 SmallVector<const SCEV *, 4> Key = F.BaseRegs;
Dan Gohman20fab452010-05-19 23:43:12 +00001204 if (F.ScaledReg) Key.push_back(F.ScaledReg);
1205 // Unstable sort by host order ok, because this is only used for uniquifying.
1206 std::sort(Key.begin(), Key.end());
1207 return Uniquifier.count(Key);
1208}
1209
Dan Gohman45774ce2010-02-12 10:34:29 +00001210/// InsertFormula - If the given formula has not yet been inserted, add it to
1211/// the list, and return true. Return false otherwise.
Dan Gohman8c16b382010-02-22 04:11:59 +00001212bool LSRUse::InsertFormula(const Formula &F) {
Andrew Trick57243da2013-10-25 21:35:56 +00001213 if (!Formulae.empty() && RigidFormula)
1214 return false;
1215
Preston Gurd25c3b6a2013-02-01 20:41:27 +00001216 SmallVector<const SCEV *, 4> Key = F.BaseRegs;
Dan Gohman45774ce2010-02-12 10:34:29 +00001217 if (F.ScaledReg) Key.push_back(F.ScaledReg);
1218 // Unstable sort by host order ok, because this is only used for uniquifying.
1219 std::sort(Key.begin(), Key.end());
1220
1221 if (!Uniquifier.insert(Key).second)
1222 return false;
1223
1224 // Using a register to hold the value of 0 is not profitable.
1225 assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
1226 "Zero allocated in a scaled register!");
1227#ifndef NDEBUG
1228 for (SmallVectorImpl<const SCEV *>::const_iterator I =
1229 F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I)
1230 assert(!(*I)->isZero() && "Zero allocated in a base register!");
1231#endif
1232
1233 // Add the formula to the list.
1234 Formulae.push_back(F);
1235
1236 // Record registers now being used by this use.
Dan Gohman45774ce2010-02-12 10:34:29 +00001237 Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
1238
1239 return true;
Dan Gohman045f8192010-01-22 00:46:49 +00001240}
1241
Dan Gohmanf1c7b1b2010-05-18 22:39:15 +00001242/// DeleteFormula - Remove the given formula from this use's list.
1243void LSRUse::DeleteFormula(Formula &F) {
Dan Gohman80a96082010-05-20 15:17:54 +00001244 if (&F != &Formulae.back())
1245 std::swap(F, Formulae.back());
Dan Gohmanf1c7b1b2010-05-18 22:39:15 +00001246 Formulae.pop_back();
1247}
1248
Dan Gohman4cf99b52010-05-18 23:42:37 +00001249/// RecomputeRegs - Recompute the Regs field, and update RegUses.
1250void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
1251 // Now that we've filtered out some formulae, recompute the Regs set.
1252 SmallPtrSet<const SCEV *, 4> OldRegs = Regs;
1253 Regs.clear();
Dan Gohman927bcaa2010-05-20 20:33:18 +00001254 for (SmallVectorImpl<Formula>::const_iterator I = Formulae.begin(),
1255 E = Formulae.end(); I != E; ++I) {
1256 const Formula &F = *I;
Dan Gohman4cf99b52010-05-18 23:42:37 +00001257 if (F.ScaledReg) Regs.insert(F.ScaledReg);
1258 Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
1259 }
1260
1261 // Update the RegTracker.
1262 for (SmallPtrSet<const SCEV *, 4>::iterator I = OldRegs.begin(),
1263 E = OldRegs.end(); I != E; ++I)
1264 if (!Regs.count(*I))
1265 RegUses.DropRegister(*I, LUIdx);
1266}
1267
Dan Gohman45774ce2010-02-12 10:34:29 +00001268void LSRUse::print(raw_ostream &OS) const {
1269 OS << "LSR Use: Kind=";
1270 switch (Kind) {
1271 case Basic: OS << "Basic"; break;
1272 case Special: OS << "Special"; break;
1273 case ICmpZero: OS << "ICmpZero"; break;
1274 case Address:
1275 OS << "Address of ";
Duncan Sands19d0b472010-02-16 11:11:14 +00001276 if (AccessTy->isPointerTy())
Dan Gohman45774ce2010-02-12 10:34:29 +00001277 OS << "pointer"; // the full pointer type could be really verbose
1278 else
1279 OS << *AccessTy;
Evan Cheng133694d2007-10-25 09:11:16 +00001280 }
1281
Dan Gohman45774ce2010-02-12 10:34:29 +00001282 OS << ", Offsets={";
1283 for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
1284 E = Offsets.end(); I != E; ++I) {
1285 OS << *I;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +00001286 if (std::next(I) != E)
Dan Gohman45774ce2010-02-12 10:34:29 +00001287 OS << ',';
Dan Gohman045f8192010-01-22 00:46:49 +00001288 }
Dan Gohman45774ce2010-02-12 10:34:29 +00001289 OS << '}';
Dan Gohman045f8192010-01-22 00:46:49 +00001290
Dan Gohman45774ce2010-02-12 10:34:29 +00001291 if (AllFixupsOutsideLoop)
1292 OS << ", all-fixups-outside-loop";
Dan Gohman14152082010-07-15 20:24:58 +00001293
1294 if (WidestFixupType)
1295 OS << ", widest fixup type: " << *WidestFixupType;
Dan Gohman045f8192010-01-22 00:46:49 +00001296}
1297
Manman Ren49d684e2012-09-12 05:06:18 +00001298#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Dan Gohman45774ce2010-02-12 10:34:29 +00001299void LSRUse::dump() const {
1300 print(errs()); errs() << '\n';
1301}
Manman Renc3366cc2012-09-06 19:55:56 +00001302#endif
Dan Gohman045f8192010-01-22 00:46:49 +00001303
Dan Gohman45774ce2010-02-12 10:34:29 +00001304/// isLegalUse - Test whether the use described by AM is "legal", meaning it can
1305/// be completely folded into the user instruction at isel time. This includes
1306/// address-mode folding and special icmp tricks.
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001307static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind,
1308 Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset,
1309 bool HasBaseReg, int64_t Scale) {
Dan Gohman45774ce2010-02-12 10:34:29 +00001310 switch (Kind) {
1311 case LSRUse::Address:
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001312 return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
Dan Gohman45774ce2010-02-12 10:34:29 +00001313
1314 // Otherwise, just guess that reg+reg addressing is legal.
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001315 //return ;
Dan Gohman45774ce2010-02-12 10:34:29 +00001316
1317 case LSRUse::ICmpZero:
1318 // There's not even a target hook for querying whether it would be legal to
1319 // fold a GV into an ICmp.
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001320 if (BaseGV)
Dan Gohman45774ce2010-02-12 10:34:29 +00001321 return false;
1322
1323 // ICmp only has two operands; don't allow more than two non-trivial parts.
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001324 if (Scale != 0 && HasBaseReg && BaseOffset != 0)
Dan Gohman45774ce2010-02-12 10:34:29 +00001325 return false;
1326
1327 // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
1328 // putting the scaled register in the other operand of the icmp.
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001329 if (Scale != 0 && Scale != -1)
Dan Gohman45774ce2010-02-12 10:34:29 +00001330 return false;
1331
1332 // If we have low-level target information, ask the target if it can fold an
1333 // integer immediate on an icmp.
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001334 if (BaseOffset != 0) {
Jakob Stoklund Olesenf2390e82012-04-05 03:10:56 +00001335 // We have one of:
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001336 // ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
1337 // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset
Jakob Stoklund Olesenf2390e82012-04-05 03:10:56 +00001338 // Offs is the ICmp immediate.
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001339 if (Scale == 0)
1340 // The cast does the right thing with INT64_MIN.
1341 BaseOffset = -(uint64_t)BaseOffset;
1342 return TTI.isLegalICmpImmediate(BaseOffset);
Dan Gohman045f8192010-01-22 00:46:49 +00001343 }
Dan Gohman45774ce2010-02-12 10:34:29 +00001344
Jakob Stoklund Olesenf2390e82012-04-05 03:10:56 +00001345 // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
Dan Gohman45774ce2010-02-12 10:34:29 +00001346 return true;
1347
1348 case LSRUse::Basic:
1349 // Only handle single-register values.
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001350 return !BaseGV && Scale == 0 && BaseOffset == 0;
Dan Gohman45774ce2010-02-12 10:34:29 +00001351
1352 case LSRUse::Special:
Andrew Trickaca8fb32012-06-15 20:07:26 +00001353 // Special case Basic to handle -1 scales.
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001354 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
Dan Gohman045f8192010-01-22 00:46:49 +00001355 }
1356
David Blaikie46a9f012012-01-20 21:51:11 +00001357 llvm_unreachable("Invalid LSRUse Kind!");
Dan Gohman045f8192010-01-22 00:46:49 +00001358}
1359
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001360static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
1361 int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
1362 GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
1363 int64_t Scale) {
Dan Gohman45774ce2010-02-12 10:34:29 +00001364 // Check for overflow.
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001365 if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
Dan Gohman45774ce2010-02-12 10:34:29 +00001366 (MinOffset > 0))
1367 return false;
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001368 MinOffset = (uint64_t)BaseOffset + MinOffset;
1369 if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
1370 (MaxOffset > 0))
1371 return false;
1372 MaxOffset = (uint64_t)BaseOffset + MaxOffset;
1373
1374 return isLegalUse(TTI, Kind, AccessTy, BaseGV, MinOffset, HasBaseReg,
1375 Scale) &&
1376 isLegalUse(TTI, Kind, AccessTy, BaseGV, MaxOffset, HasBaseReg, Scale);
Dan Gohman045f8192010-01-22 00:46:49 +00001377}
1378
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001379static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
1380 int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
1381 const Formula &F) {
Chandler Carruth6e479322013-01-07 15:04:40 +00001382 return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
1383 F.BaseOffset, F.HasBaseReg, F.Scale);
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001384}
1385
Quentin Colombet8aa7abe2013-05-31 17:20:29 +00001386static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU,
1387 const Formula &F) {
1388 // If F is used as an Addressing Mode, it may fold one Base plus one
1389 // scaled register. If the scaled register is nil, do as if another
1390 // element of the base regs is a 1-scaled register.
1391 // This is possible if BaseRegs has at least 2 registers.
1392
1393 // If this is not an address calculation, this is not an addressing mode
1394 // use.
1395 if (LU.Kind != LSRUse::Address)
1396 return false;
1397
1398 // F is already scaled.
1399 if (F.Scale != 0)
1400 return false;
1401
1402 // We need to keep one register for the base and one to scale.
1403 if (F.BaseRegs.size() < 2)
1404 return false;
1405
1406 return isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
1407 F.BaseGV, F.BaseOffset, F.HasBaseReg, 1);
1408 }
1409
Quentin Colombetbf490d42013-05-31 21:29:03 +00001410static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
1411 const LSRUse &LU, const Formula &F) {
1412 if (!F.Scale)
1413 return 0;
1414 assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
1415 LU.AccessTy, F) && "Illegal formula in use.");
1416
1417 switch (LU.Kind) {
1418 case LSRUse::Address: {
Quentin Colombet145eb972013-06-19 19:59:41 +00001419 // Check the scaling factor cost with both the min and max offsets.
1420 int ScaleCostMinOffset =
1421 TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV,
1422 F.BaseOffset + LU.MinOffset,
1423 F.HasBaseReg, F.Scale);
1424 int ScaleCostMaxOffset =
1425 TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV,
1426 F.BaseOffset + LU.MaxOffset,
1427 F.HasBaseReg, F.Scale);
1428
1429 assert(ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 &&
1430 "Legal addressing mode has an illegal cost!");
1431 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
Quentin Colombetbf490d42013-05-31 21:29:03 +00001432 }
1433 case LSRUse::ICmpZero:
1434 // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg.
Andrew Trick57243da2013-10-25 21:35:56 +00001435 // Therefore, return 0 in case F.Scale == -1.
Quentin Colombetbf490d42013-05-31 21:29:03 +00001436 return F.Scale != -1;
1437
1438 case LSRUse::Basic:
1439 case LSRUse::Special:
1440 return 0;
1441 }
1442
1443 llvm_unreachable("Invalid LSRUse Kind!");
1444}
1445
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001446static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
Chris Lattner229907c2011-07-18 04:54:35 +00001447 LSRUse::KindType Kind, Type *AccessTy,
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001448 GlobalValue *BaseGV, int64_t BaseOffset,
1449 bool HasBaseReg) {
Dan Gohman45774ce2010-02-12 10:34:29 +00001450 // Fast-path: zero is always foldable.
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001451 if (BaseOffset == 0 && !BaseGV) return true;
Dan Gohman045f8192010-01-22 00:46:49 +00001452
Dan Gohman45774ce2010-02-12 10:34:29 +00001453 // Conservatively, create an address with an immediate and a
1454 // base and a scale.
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001455 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
Dan Gohman045f8192010-01-22 00:46:49 +00001456
Dan Gohman20fab452010-05-19 23:43:12 +00001457 // Canonicalize a scale of 1 to a base register if the formula doesn't
1458 // already have a base register.
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001459 if (!HasBaseReg && Scale == 1) {
1460 Scale = 0;
1461 HasBaseReg = true;
Dan Gohman20fab452010-05-19 23:43:12 +00001462 }
1463
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001464 return isLegalUse(TTI, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
Dan Gohman045f8192010-01-22 00:46:49 +00001465}
1466
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001467static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
1468 ScalarEvolution &SE, int64_t MinOffset,
1469 int64_t MaxOffset, LSRUse::KindType Kind,
1470 Type *AccessTy, const SCEV *S, bool HasBaseReg) {
Dan Gohman45774ce2010-02-12 10:34:29 +00001471 // Fast-path: zero is always foldable.
1472 if (S->isZero()) return true;
1473
1474 // Conservatively, create an address with an immediate and a
1475 // base and a scale.
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001476 int64_t BaseOffset = ExtractImmediate(S, SE);
Dan Gohman45774ce2010-02-12 10:34:29 +00001477 GlobalValue *BaseGV = ExtractSymbol(S, SE);
1478
1479 // If there's anything else involved, it's not foldable.
1480 if (!S->isZero()) return false;
1481
1482 // Fast-path: zero is always foldable.
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001483 if (BaseOffset == 0 && !BaseGV) return true;
Dan Gohman45774ce2010-02-12 10:34:29 +00001484
1485 // Conservatively, create an address with an immediate and a
1486 // base and a scale.
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001487 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
Dan Gohman45774ce2010-02-12 10:34:29 +00001488
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001489 return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
1490 BaseOffset, HasBaseReg, Scale);
Dan Gohman045f8192010-01-22 00:46:49 +00001491}
1492
Dan Gohman297fb8b2010-06-19 21:21:39 +00001493namespace {
1494
Andrew Trick29fe5f02012-01-09 19:50:34 +00001495/// IVInc - An individual increment in a Chain of IV increments.
1496/// Relate an IV user to an expression that computes the IV it uses from the IV
1497/// used by the previous link in the Chain.
1498///
1499/// For the head of a chain, IncExpr holds the absolute SCEV expression for the
1500/// original IVOperand. The head of the chain's IVOperand is only valid during
1501/// chain collection, before LSR replaces IV users. During chain generation,
1502/// IncExpr can be used to find the new IVOperand that computes the same
1503/// expression.
1504struct IVInc {
1505 Instruction *UserInst;
1506 Value* IVOperand;
1507 const SCEV *IncExpr;
1508
1509 IVInc(Instruction *U, Value *O, const SCEV *E):
1510 UserInst(U), IVOperand(O), IncExpr(E) {}
1511};
1512
1513// IVChain - The list of IV increments in program order.
1514// We typically add the head of a chain without finding subsequent links.
Jakob Stoklund Olesena0337d72012-04-26 23:33:09 +00001515struct IVChain {
1516 SmallVector<IVInc,1> Incs;
Jakob Stoklund Olesenc90abc82012-04-26 23:33:11 +00001517 const SCEV *ExprBase;
1518
1519 IVChain() : ExprBase(0) {}
1520
1521 IVChain(const IVInc &Head, const SCEV *Base)
1522 : Incs(1, Head), ExprBase(Base) {}
Jakob Stoklund Olesena0337d72012-04-26 23:33:09 +00001523
1524 typedef SmallVectorImpl<IVInc>::const_iterator const_iterator;
1525
1526 // begin - return the first increment in the chain.
1527 const_iterator begin() const {
1528 assert(!Incs.empty());
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +00001529 return std::next(Incs.begin());
Jakob Stoklund Olesena0337d72012-04-26 23:33:09 +00001530 }
1531 const_iterator end() const {
1532 return Incs.end();
1533 }
1534
1535 // hasIncs - Returns true if this chain contains any increments.
1536 bool hasIncs() const { return Incs.size() >= 2; }
1537
1538 // add - Add an IVInc to the end of this chain.
1539 void add(const IVInc &X) { Incs.push_back(X); }
1540
1541 // tailUserInst - Returns the last UserInst in the chain.
1542 Instruction *tailUserInst() const { return Incs.back().UserInst; }
Jakob Stoklund Olesenc90abc82012-04-26 23:33:11 +00001543
1544 // isProfitableIncrement - Returns true if IncExpr can be profitably added to
1545 // this chain.
1546 bool isProfitableIncrement(const SCEV *OperExpr,
1547 const SCEV *IncExpr,
1548 ScalarEvolution&);
Jakob Stoklund Olesena0337d72012-04-26 23:33:09 +00001549};
Andrew Trick29fe5f02012-01-09 19:50:34 +00001550
1551/// ChainUsers - Helper for CollectChains to track multiple IV increment uses.
1552/// Distinguish between FarUsers that definitely cross IV increments and
1553/// NearUsers that may be used between IV increments.
1554struct ChainUsers {
1555 SmallPtrSet<Instruction*, 4> FarUsers;
1556 SmallPtrSet<Instruction*, 4> NearUsers;
1557};
1558
Dan Gohman45774ce2010-02-12 10:34:29 +00001559/// LSRInstance - This class holds state for the main loop strength reduction
1560/// logic.
1561class LSRInstance {
1562 IVUsers &IU;
1563 ScalarEvolution &SE;
1564 DominatorTree &DT;
Dan Gohman607e02b2010-04-09 22:07:05 +00001565 LoopInfo &LI;
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001566 const TargetTransformInfo &TTI;
Dan Gohman45774ce2010-02-12 10:34:29 +00001567 Loop *const L;
1568 bool Changed;
1569
1570 /// IVIncInsertPos - This is the insert position that the current loop's
1571 /// induction variable increment should be placed. In simple loops, this is
1572 /// the latch block's terminator. But in more complicated cases, this is a
1573 /// position which will dominate all the in-loop post-increment users.
1574 Instruction *IVIncInsertPos;
1575
1576 /// Factors - Interesting factors between use strides.
1577 SmallSetVector<int64_t, 8> Factors;
1578
1579 /// Types - Interesting use types, to facilitate truncation reuse.
Chris Lattner229907c2011-07-18 04:54:35 +00001580 SmallSetVector<Type *, 4> Types;
Dan Gohman45774ce2010-02-12 10:34:29 +00001581
1582 /// Fixups - The list of operands which are to be replaced.
1583 SmallVector<LSRFixup, 16> Fixups;
1584
1585 /// Uses - The list of interesting uses.
1586 SmallVector<LSRUse, 16> Uses;
1587
1588 /// RegUses - Track which uses use which register candidates.
1589 RegUseTracker RegUses;
1590
Andrew Trick29fe5f02012-01-09 19:50:34 +00001591 // Limit the number of chains to avoid quadratic behavior. We don't expect to
1592 // have more than a few IV increment chains in a loop. Missing a Chain falls
1593 // back to normal LSR behavior for those uses.
1594 static const unsigned MaxChains = 8;
1595
1596 /// IVChainVec - IV users can form a chain of IV increments.
1597 SmallVector<IVChain, MaxChains> IVChainVec;
1598
Andrew Trick248d4102012-01-09 21:18:52 +00001599 /// IVIncSet - IV users that belong to profitable IVChains.
1600 SmallPtrSet<Use*, MaxChains> IVIncSet;
1601
Dan Gohman45774ce2010-02-12 10:34:29 +00001602 void OptimizeShadowIV();
1603 bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
1604 ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
Dan Gohman4c4043c2010-05-20 20:05:31 +00001605 void OptimizeLoopTermCond();
Dan Gohman45774ce2010-02-12 10:34:29 +00001606
Andrew Trick29fe5f02012-01-09 19:50:34 +00001607 void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
1608 SmallVectorImpl<ChainUsers> &ChainUsersVec);
Andrew Trick248d4102012-01-09 21:18:52 +00001609 void FinalizeChain(IVChain &Chain);
Andrew Trick29fe5f02012-01-09 19:50:34 +00001610 void CollectChains();
Andrew Trick248d4102012-01-09 21:18:52 +00001611 void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
1612 SmallVectorImpl<WeakVH> &DeadInsts);
Andrew Trick29fe5f02012-01-09 19:50:34 +00001613
Dan Gohman45774ce2010-02-12 10:34:29 +00001614 void CollectInterestingTypesAndFactors();
1615 void CollectFixupsAndInitialFormulae();
1616
1617 LSRFixup &getNewFixup() {
1618 Fixups.push_back(LSRFixup());
1619 return Fixups.back();
1620 }
1621
1622 // Support for sharing of LSRUses between LSRFixups.
Benjamin Kramer62fb0cf2014-03-15 17:17:48 +00001623 typedef DenseMap<LSRUse::SCEVUseKindPair, size_t> UseMapTy;
Dan Gohman45774ce2010-02-12 10:34:29 +00001624 UseMapTy UseMap;
1625
Dan Gohman110ed642010-09-01 01:45:53 +00001626 bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
Chris Lattner229907c2011-07-18 04:54:35 +00001627 LSRUse::KindType Kind, Type *AccessTy);
Dan Gohman45774ce2010-02-12 10:34:29 +00001628
1629 std::pair<size_t, int64_t> getUse(const SCEV *&Expr,
1630 LSRUse::KindType Kind,
Chris Lattner229907c2011-07-18 04:54:35 +00001631 Type *AccessTy);
Dan Gohman45774ce2010-02-12 10:34:29 +00001632
Dan Gohmana7b68d62010-10-07 23:33:43 +00001633 void DeleteUse(LSRUse &LU, size_t LUIdx);
Dan Gohman80a96082010-05-20 15:17:54 +00001634
Dan Gohman110ed642010-09-01 01:45:53 +00001635 LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
Dan Gohman20fab452010-05-19 23:43:12 +00001636
Dan Gohman8c16b382010-02-22 04:11:59 +00001637 void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
Dan Gohman45774ce2010-02-12 10:34:29 +00001638 void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
1639 void CountRegisters(const Formula &F, size_t LUIdx);
1640 bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
1641
1642 void CollectLoopInvariantFixupsAndFormulae();
1643
1644 void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
1645 unsigned Depth = 0);
1646 void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
1647 void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
1648 void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
1649 void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
1650 void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
1651 void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
1652 void GenerateCrossUseConstantOffsets();
1653 void GenerateAllReuseFormulae();
1654
1655 void FilterOutUndesirableDedicatedRegisters();
Dan Gohmana4eca052010-05-18 22:51:59 +00001656
1657 size_t EstimateSearchSpaceComplexity() const;
Dan Gohmane9e08732010-08-29 16:09:42 +00001658 void NarrowSearchSpaceByDetectingSupersets();
1659 void NarrowSearchSpaceByCollapsingUnrolledCode();
Dan Gohman002ff892010-08-29 16:39:22 +00001660 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
Dan Gohmane9e08732010-08-29 16:09:42 +00001661 void NarrowSearchSpaceByPickingWinnerRegs();
Dan Gohman45774ce2010-02-12 10:34:29 +00001662 void NarrowSearchSpaceUsingHeuristics();
1663
1664 void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
1665 Cost &SolutionCost,
1666 SmallVectorImpl<const Formula *> &Workspace,
1667 const Cost &CurCost,
1668 const SmallPtrSet<const SCEV *, 16> &CurRegs,
1669 DenseSet<const SCEV *> &VisitedRegs) const;
1670 void Solve(SmallVectorImpl<const Formula *> &Solution) const;
1671
Dan Gohman607e02b2010-04-09 22:07:05 +00001672 BasicBlock::iterator
1673 HoistInsertPosition(BasicBlock::iterator IP,
1674 const SmallVectorImpl<Instruction *> &Inputs) const;
Andrew Trickc908b432012-01-20 07:41:13 +00001675 BasicBlock::iterator
1676 AdjustInsertPositionForExpand(BasicBlock::iterator IP,
1677 const LSRFixup &LF,
1678 const LSRUse &LU,
1679 SCEVExpander &Rewriter) const;
Dan Gohmand2df6432010-04-09 02:00:38 +00001680
Dan Gohman45774ce2010-02-12 10:34:29 +00001681 Value *Expand(const LSRFixup &LF,
1682 const Formula &F,
Dan Gohman8c16b382010-02-22 04:11:59 +00001683 BasicBlock::iterator IP,
Dan Gohman45774ce2010-02-12 10:34:29 +00001684 SCEVExpander &Rewriter,
Dan Gohman8c16b382010-02-22 04:11:59 +00001685 SmallVectorImpl<WeakVH> &DeadInsts) const;
Dan Gohman6deab962010-02-16 20:25:07 +00001686 void RewriteForPHI(PHINode *PN, const LSRFixup &LF,
1687 const Formula &F,
Dan Gohman6deab962010-02-16 20:25:07 +00001688 SCEVExpander &Rewriter,
1689 SmallVectorImpl<WeakVH> &DeadInsts,
Dan Gohman6deab962010-02-16 20:25:07 +00001690 Pass *P) const;
Dan Gohman45774ce2010-02-12 10:34:29 +00001691 void Rewrite(const LSRFixup &LF,
1692 const Formula &F,
Dan Gohman45774ce2010-02-12 10:34:29 +00001693 SCEVExpander &Rewriter,
1694 SmallVectorImpl<WeakVH> &DeadInsts,
Dan Gohman45774ce2010-02-12 10:34:29 +00001695 Pass *P) const;
1696 void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
1697 Pass *P);
1698
Andrew Trickdc18e382011-12-13 00:55:33 +00001699public:
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001700 LSRInstance(Loop *L, Pass *P);
Dan Gohman45774ce2010-02-12 10:34:29 +00001701
1702 bool getChanged() const { return Changed; }
1703
1704 void print_factors_and_types(raw_ostream &OS) const;
1705 void print_fixups(raw_ostream &OS) const;
1706 void print_uses(raw_ostream &OS) const;
1707 void print(raw_ostream &OS) const;
1708 void dump() const;
1709};
1710
1711}
1712
1713/// OptimizeShadowIV - If IV is used in a int-to-float cast
Dan Gohman8b0a4192010-03-01 17:49:51 +00001714/// inside the loop then try to eliminate the cast operation.
Dan Gohman45774ce2010-02-12 10:34:29 +00001715void LSRInstance::OptimizeShadowIV() {
1716 const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
1717 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
1718 return;
1719
1720 for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
1721 UI != E; /* empty */) {
1722 IVUsers::const_iterator CandidateUI = UI;
1723 ++UI;
1724 Instruction *ShadowUse = CandidateUI->getUser();
Jakub Staszak4898e622013-06-15 12:20:44 +00001725 Type *DestTy = 0;
Andrew Trick858e9f02011-07-21 01:05:01 +00001726 bool IsSigned = false;
Dan Gohman45774ce2010-02-12 10:34:29 +00001727
1728 /* If shadow use is a int->float cast then insert a second IV
1729 to eliminate this cast.
1730
1731 for (unsigned i = 0; i < n; ++i)
1732 foo((double)i);
1733
1734 is transformed into
1735
1736 double d = 0.0;
1737 for (unsigned i = 0; i < n; ++i, ++d)
1738 foo(d);
1739 */
Andrew Trick858e9f02011-07-21 01:05:01 +00001740 if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
1741 IsSigned = false;
Dan Gohman45774ce2010-02-12 10:34:29 +00001742 DestTy = UCast->getDestTy();
Andrew Trick858e9f02011-07-21 01:05:01 +00001743 }
1744 else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
1745 IsSigned = true;
Dan Gohman45774ce2010-02-12 10:34:29 +00001746 DestTy = SCast->getDestTy();
Andrew Trick858e9f02011-07-21 01:05:01 +00001747 }
Dan Gohman45774ce2010-02-12 10:34:29 +00001748 if (!DestTy) continue;
1749
Chandler Carruth26c59fa2013-01-07 14:41:08 +00001750 // If target does not support DestTy natively then do not apply
1751 // this transformation.
1752 if (!TTI.isTypeLegal(DestTy)) continue;
Dan Gohman45774ce2010-02-12 10:34:29 +00001753
1754 PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
1755 if (!PH) continue;
1756 if (PH->getNumIncomingValues() != 2) continue;
1757
Chris Lattner229907c2011-07-18 04:54:35 +00001758 Type *SrcTy = PH->getType();
Dan Gohman45774ce2010-02-12 10:34:29 +00001759 int Mantissa = DestTy->getFPMantissaWidth();
1760 if (Mantissa == -1) continue;
1761 if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
1762 continue;
1763
1764 unsigned Entry, Latch;
1765 if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
1766 Entry = 0;
1767 Latch = 1;
Dan Gohman045f8192010-01-22 00:46:49 +00001768 } else {
Dan Gohman45774ce2010-02-12 10:34:29 +00001769 Entry = 1;
1770 Latch = 0;
Dan Gohman045f8192010-01-22 00:46:49 +00001771 }
Dan Gohman045f8192010-01-22 00:46:49 +00001772
Dan Gohman45774ce2010-02-12 10:34:29 +00001773 ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
1774 if (!Init) continue;
Andrew Trick858e9f02011-07-21 01:05:01 +00001775 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
Andrew Trickbd243d02011-07-21 01:45:54 +00001776 (double)Init->getSExtValue() :
1777 (double)Init->getZExtValue());
Dan Gohman045f8192010-01-22 00:46:49 +00001778
Dan Gohman45774ce2010-02-12 10:34:29 +00001779 BinaryOperator *Incr =
1780 dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
1781 if (!Incr) continue;
1782 if (Incr->getOpcode() != Instruction::Add
1783 && Incr->getOpcode() != Instruction::Sub)
Dan Gohman045f8192010-01-22 00:46:49 +00001784 continue;
Dan Gohman045f8192010-01-22 00:46:49 +00001785
Dan Gohman45774ce2010-02-12 10:34:29 +00001786 /* Initialize new IV, double d = 0.0 in above example. */
Jakub Staszak4898e622013-06-15 12:20:44 +00001787 ConstantInt *C = 0;
Dan Gohman45774ce2010-02-12 10:34:29 +00001788 if (Incr->getOperand(0) == PH)
1789 C = dyn_cast<ConstantInt>(Incr->getOperand(1));
1790 else if (Incr->getOperand(1) == PH)
1791 C = dyn_cast<ConstantInt>(Incr->getOperand(0));
Dan Gohman045f8192010-01-22 00:46:49 +00001792 else
Dan Gohman045f8192010-01-22 00:46:49 +00001793 continue;
1794
Dan Gohman45774ce2010-02-12 10:34:29 +00001795 if (!C) continue;
Dan Gohman045f8192010-01-22 00:46:49 +00001796
Dan Gohman45774ce2010-02-12 10:34:29 +00001797 // Ignore negative constants, as the code below doesn't handle them
1798 // correctly. TODO: Remove this restriction.
1799 if (!C->getValue().isStrictlyPositive()) continue;
Dan Gohman045f8192010-01-22 00:46:49 +00001800
Dan Gohman45774ce2010-02-12 10:34:29 +00001801 /* Add new PHINode. */
Jay Foad52131342011-03-30 11:28:46 +00001802 PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH);
Dan Gohman045f8192010-01-22 00:46:49 +00001803
Dan Gohman45774ce2010-02-12 10:34:29 +00001804 /* create new increment. '++d' in above example. */
1805 Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
1806 BinaryOperator *NewIncr =
1807 BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
1808 Instruction::FAdd : Instruction::FSub,
1809 NewPH, CFP, "IV.S.next.", Incr);
Dan Gohman045f8192010-01-22 00:46:49 +00001810
Dan Gohman45774ce2010-02-12 10:34:29 +00001811 NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
1812 NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
Dan Gohman045f8192010-01-22 00:46:49 +00001813
Dan Gohman45774ce2010-02-12 10:34:29 +00001814 /* Remove cast operation */
1815 ShadowUse->replaceAllUsesWith(NewPH);
1816 ShadowUse->eraseFromParent();
Dan Gohman4c4043c2010-05-20 20:05:31 +00001817 Changed = true;
Dan Gohman45774ce2010-02-12 10:34:29 +00001818 break;
Dan Gohman045f8192010-01-22 00:46:49 +00001819 }
1820}
1821
1822/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
1823/// set the IV user and stride information and return true, otherwise return
1824/// false.
Dan Gohmanab5fb7f2010-05-20 19:44:23 +00001825bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
Dan Gohman45774ce2010-02-12 10:34:29 +00001826 for (IVUsers::iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
1827 if (UI->getUser() == Cond) {
1828 // NOTE: we could handle setcc instructions with multiple uses here, but
1829 // InstCombine does it as well for simple uses, it's not clear that it
1830 // occurs enough in real life to handle.
1831 CondUse = UI;
1832 return true;
1833 }
Dan Gohman045f8192010-01-22 00:46:49 +00001834 return false;
Evan Cheng133694d2007-10-25 09:11:16 +00001835}
1836
Dan Gohman045f8192010-01-22 00:46:49 +00001837/// OptimizeMax - Rewrite the loop's terminating condition if it uses
1838/// a max computation.
1839///
1840/// This is a narrow solution to a specific, but acute, problem. For loops
1841/// like this:
1842///
1843/// i = 0;
1844/// do {
1845/// p[i] = 0.0;
1846/// } while (++i < n);
1847///
1848/// the trip count isn't just 'n', because 'n' might not be positive. And
1849/// unfortunately this can come up even for loops where the user didn't use
1850/// a C do-while loop. For example, seemingly well-behaved top-test loops
1851/// will commonly be lowered like this:
1852//
1853/// if (n > 0) {
1854/// i = 0;
1855/// do {
1856/// p[i] = 0.0;
1857/// } while (++i < n);
1858/// }
1859///
1860/// and then it's possible for subsequent optimization to obscure the if
1861/// test in such a way that indvars can't find it.
1862///
1863/// When indvars can't find the if test in loops like this, it creates a
1864/// max expression, which allows it to give the loop a canonical
1865/// induction variable:
1866///
1867/// i = 0;
1868/// max = n < 1 ? 1 : n;
1869/// do {
1870/// p[i] = 0.0;
1871/// } while (++i != max);
1872///
1873/// Canonical induction variables are necessary because the loop passes
1874/// are designed around them. The most obvious example of this is the
1875/// LoopInfo analysis, which doesn't remember trip count values. It
1876/// expects to be able to rediscover the trip count each time it is
Dan Gohman45774ce2010-02-12 10:34:29 +00001877/// needed, and it does this using a simple analysis that only succeeds if
Dan Gohman045f8192010-01-22 00:46:49 +00001878/// the loop has a canonical induction variable.
1879///
1880/// However, when it comes time to generate code, the maximum operation
1881/// can be quite costly, especially if it's inside of an outer loop.
1882///
1883/// This function solves this problem by detecting this type of loop and
1884/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
1885/// the instructions for the maximum computation.
1886///
Dan Gohman45774ce2010-02-12 10:34:29 +00001887ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
Dan Gohman045f8192010-01-22 00:46:49 +00001888 // Check that the loop matches the pattern we're looking for.
1889 if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
1890 Cond->getPredicate() != CmpInst::ICMP_NE)
1891 return Cond;
Dan Gohman51ad99d2010-01-21 02:09:26 +00001892
Dan Gohman045f8192010-01-22 00:46:49 +00001893 SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
1894 if (!Sel || !Sel->hasOneUse()) return Cond;
Dan Gohman51ad99d2010-01-21 02:09:26 +00001895
Dan Gohman45774ce2010-02-12 10:34:29 +00001896 const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
Dan Gohman045f8192010-01-22 00:46:49 +00001897 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
1898 return Cond;
Dan Gohman1d2ded72010-05-03 22:09:21 +00001899 const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
Dan Gohman51ad99d2010-01-21 02:09:26 +00001900
Dan Gohman045f8192010-01-22 00:46:49 +00001901 // Add one to the backedge-taken count to get the trip count.
Dan Gohman9b7632d2010-08-16 15:39:27 +00001902 const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
Dan Gohman534ba372010-04-24 03:13:44 +00001903 if (IterationCount != SE.getSCEV(Sel)) return Cond;
Dan Gohman045f8192010-01-22 00:46:49 +00001904
Dan Gohman534ba372010-04-24 03:13:44 +00001905 // Check for a max calculation that matches the pattern. There's no check
1906 // for ICMP_ULE here because the comparison would be with zero, which
1907 // isn't interesting.
1908 CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
1909 const SCEVNAryExpr *Max = 0;
1910 if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
1911 Pred = ICmpInst::ICMP_SLE;
1912 Max = S;
1913 } else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
1914 Pred = ICmpInst::ICMP_SLT;
1915 Max = S;
1916 } else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
1917 Pred = ICmpInst::ICMP_ULT;
1918 Max = U;
1919 } else {
1920 // No match; bail.
Dan Gohman045f8192010-01-22 00:46:49 +00001921 return Cond;
Dan Gohman534ba372010-04-24 03:13:44 +00001922 }
Dan Gohman045f8192010-01-22 00:46:49 +00001923
1924 // To handle a max with more than two operands, this optimization would
1925 // require additional checking and setup.
1926 if (Max->getNumOperands() != 2)
1927 return Cond;
1928
1929 const SCEV *MaxLHS = Max->getOperand(0);
1930 const SCEV *MaxRHS = Max->getOperand(1);
Dan Gohman534ba372010-04-24 03:13:44 +00001931
1932 // ScalarEvolution canonicalizes constants to the left. For < and >, look
1933 // for a comparison with 1. For <= and >=, a comparison with zero.
1934 if (!MaxLHS ||
1935 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
1936 return Cond;
1937
Dan Gohman045f8192010-01-22 00:46:49 +00001938 // Check the relevant induction variable for conformance to
1939 // the pattern.
Dan Gohman45774ce2010-02-12 10:34:29 +00001940 const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
Dan Gohman045f8192010-01-22 00:46:49 +00001941 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
1942 if (!AR || !AR->isAffine() ||
1943 AR->getStart() != One ||
Dan Gohman45774ce2010-02-12 10:34:29 +00001944 AR->getStepRecurrence(SE) != One)
Dan Gohman045f8192010-01-22 00:46:49 +00001945 return Cond;
1946
1947 assert(AR->getLoop() == L &&
1948 "Loop condition operand is an addrec in a different loop!");
1949
1950 // Check the right operand of the select, and remember it, as it will
1951 // be used in the new comparison instruction.
1952 Value *NewRHS = 0;
Dan Gohman534ba372010-04-24 03:13:44 +00001953 if (ICmpInst::isTrueWhenEqual(Pred)) {
1954 // Look for n+1, and grab n.
1955 if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
Jakub Staszakf6df1e32013-03-24 09:25:47 +00001956 if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
1957 if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
1958 NewRHS = BO->getOperand(0);
Dan Gohman534ba372010-04-24 03:13:44 +00001959 if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
Jakub Staszakf6df1e32013-03-24 09:25:47 +00001960 if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
1961 if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
1962 NewRHS = BO->getOperand(0);
Dan Gohman534ba372010-04-24 03:13:44 +00001963 if (!NewRHS)
1964 return Cond;
1965 } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
Dan Gohman045f8192010-01-22 00:46:49 +00001966 NewRHS = Sel->getOperand(1);
Dan Gohman45774ce2010-02-12 10:34:29 +00001967 else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
Dan Gohman045f8192010-01-22 00:46:49 +00001968 NewRHS = Sel->getOperand(2);
Dan Gohman1081f1a2010-06-22 23:07:13 +00001969 else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
1970 NewRHS = SU->getValue();
Dan Gohman534ba372010-04-24 03:13:44 +00001971 else
Dan Gohman1081f1a2010-06-22 23:07:13 +00001972 // Max doesn't match expected pattern.
1973 return Cond;
Dan Gohman045f8192010-01-22 00:46:49 +00001974
1975 // Determine the new comparison opcode. It may be signed or unsigned,
1976 // and the original comparison may be either equality or inequality.
Dan Gohman045f8192010-01-22 00:46:49 +00001977 if (Cond->getPredicate() == CmpInst::ICMP_EQ)
1978 Pred = CmpInst::getInversePredicate(Pred);
1979
1980 // Ok, everything looks ok to change the condition into an SLT or SGE and
1981 // delete the max calculation.
1982 ICmpInst *NewCond =
1983 new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
1984
1985 // Delete the max calculation instructions.
1986 Cond->replaceAllUsesWith(NewCond);
1987 CondUse->setUser(NewCond);
1988 Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
1989 Cond->eraseFromParent();
1990 Sel->eraseFromParent();
1991 if (Cmp->use_empty())
1992 Cmp->eraseFromParent();
1993 return NewCond;
Dan Gohman68e77352008-09-15 21:22:06 +00001994}
1995
Jim Grosbach60f48542009-11-17 17:53:56 +00001996/// OptimizeLoopTermCond - Change loop terminating condition to use the
Evan Cheng85a9f432009-11-12 07:35:05 +00001997/// postinc iv when possible.
Dan Gohman4c4043c2010-05-20 20:05:31 +00001998void
Dan Gohman45774ce2010-02-12 10:34:29 +00001999LSRInstance::OptimizeLoopTermCond() {
2000 SmallPtrSet<Instruction *, 4> PostIncs;
2001
Evan Cheng85a9f432009-11-12 07:35:05 +00002002 BasicBlock *LatchBlock = L->getLoopLatch();
Evan Chengba4e5da72009-11-17 18:10:11 +00002003 SmallVector<BasicBlock*, 8> ExitingBlocks;
2004 L->getExitingBlocks(ExitingBlocks);
Jim Grosbach60f48542009-11-17 17:53:56 +00002005
Evan Chengba4e5da72009-11-17 18:10:11 +00002006 for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
2007 BasicBlock *ExitingBlock = ExitingBlocks[i];
Evan Cheng85a9f432009-11-12 07:35:05 +00002008
Dan Gohman45774ce2010-02-12 10:34:29 +00002009 // Get the terminating condition for the loop if possible. If we
Evan Chengba4e5da72009-11-17 18:10:11 +00002010 // can, we want to change it to use a post-incremented version of its
2011 // induction variable, to allow coalescing the live ranges for the IV into
2012 // one register value.
Evan Cheng85a9f432009-11-12 07:35:05 +00002013
Evan Chengba4e5da72009-11-17 18:10:11 +00002014 BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
2015 if (!TermBr)
2016 continue;
2017 // FIXME: Overly conservative, termination condition could be an 'or' etc..
2018 if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
2019 continue;
Evan Cheng85a9f432009-11-12 07:35:05 +00002020
Evan Chengba4e5da72009-11-17 18:10:11 +00002021 // Search IVUsesByStride to find Cond's IVUse if there is one.
2022 IVStrideUse *CondUse = 0;
Evan Chengba4e5da72009-11-17 18:10:11 +00002023 ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
Dan Gohman45774ce2010-02-12 10:34:29 +00002024 if (!FindIVUserForCond(Cond, CondUse))
Evan Chengba4e5da72009-11-17 18:10:11 +00002025 continue;
2026
Evan Chengba4e5da72009-11-17 18:10:11 +00002027 // If the trip count is computed in terms of a max (due to ScalarEvolution
2028 // being unable to find a sufficient guard, for example), change the loop
2029 // comparison to use SLT or ULT instead of NE.
Dan Gohman45774ce2010-02-12 10:34:29 +00002030 // One consequence of doing this now is that it disrupts the count-down
2031 // optimization. That's not always a bad thing though, because in such
2032 // cases it may still be worthwhile to avoid a max.
2033 Cond = OptimizeMax(Cond, CondUse);
Evan Chengba4e5da72009-11-17 18:10:11 +00002034
Dan Gohman45774ce2010-02-12 10:34:29 +00002035 // If this exiting block dominates the latch block, it may also use
2036 // the post-inc value if it won't be shared with other uses.
2037 // Check for dominance.
2038 if (!DT.dominates(ExitingBlock, LatchBlock))
Dan Gohman045f8192010-01-22 00:46:49 +00002039 continue;
Evan Chengba4e5da72009-11-17 18:10:11 +00002040
Dan Gohman45774ce2010-02-12 10:34:29 +00002041 // Conservatively avoid trying to use the post-inc value in non-latch
2042 // exits if there may be pre-inc users in intervening blocks.
Dan Gohman2d0f96d2010-02-14 03:21:49 +00002043 if (LatchBlock != ExitingBlock)
Dan Gohman45774ce2010-02-12 10:34:29 +00002044 for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
2045 // Test if the use is reachable from the exiting block. This dominator
2046 // query is a conservative approximation of reachability.
2047 if (&*UI != CondUse &&
2048 !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
2049 // Conservatively assume there may be reuse if the quotient of their
2050 // strides could be a legal scale.
Dan Gohmane637ff52010-04-19 21:48:58 +00002051 const SCEV *A = IU.getStride(*CondUse, L);
2052 const SCEV *B = IU.getStride(*UI, L);
Dan Gohmand006ab92010-04-07 22:27:08 +00002053 if (!A || !B) continue;
Dan Gohman45774ce2010-02-12 10:34:29 +00002054 if (SE.getTypeSizeInBits(A->getType()) !=
2055 SE.getTypeSizeInBits(B->getType())) {
2056 if (SE.getTypeSizeInBits(A->getType()) >
2057 SE.getTypeSizeInBits(B->getType()))
2058 B = SE.getSignExtendExpr(B, A->getType());
2059 else
2060 A = SE.getSignExtendExpr(A, B->getType());
2061 }
2062 if (const SCEVConstant *D =
Dan Gohman4eebb942010-02-19 19:35:48 +00002063 dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
Dan Gohman86110fa2010-05-20 22:25:20 +00002064 const ConstantInt *C = D->getValue();
Dan Gohman45774ce2010-02-12 10:34:29 +00002065 // Stride of one or negative one can have reuse with non-addresses.
Dan Gohman86110fa2010-05-20 22:25:20 +00002066 if (C->isOne() || C->isAllOnesValue())
Dan Gohman45774ce2010-02-12 10:34:29 +00002067 goto decline_post_inc;
2068 // Avoid weird situations.
Dan Gohman86110fa2010-05-20 22:25:20 +00002069 if (C->getValue().getMinSignedBits() >= 64 ||
2070 C->getValue().isMinSignedValue())
Dan Gohman45774ce2010-02-12 10:34:29 +00002071 goto decline_post_inc;
2072 // Check for possible scaled-address reuse.
Chris Lattner229907c2011-07-18 04:54:35 +00002073 Type *AccessTy = getAccessType(UI->getUser());
Chandler Carruth26c59fa2013-01-07 14:41:08 +00002074 int64_t Scale = C->getSExtValue();
2075 if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
2076 /*BaseOffset=*/ 0,
2077 /*HasBaseReg=*/ false, Scale))
Dan Gohman45774ce2010-02-12 10:34:29 +00002078 goto decline_post_inc;
Chandler Carruth26c59fa2013-01-07 14:41:08 +00002079 Scale = -Scale;
2080 if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
2081 /*BaseOffset=*/ 0,
2082 /*HasBaseReg=*/ false, Scale))
Dan Gohman45774ce2010-02-12 10:34:29 +00002083 goto decline_post_inc;
2084 }
2085 }
2086
David Greene2330f782009-12-23 22:58:38 +00002087 DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "
Dan Gohman45774ce2010-02-12 10:34:29 +00002088 << *Cond << '\n');
Evan Chengba4e5da72009-11-17 18:10:11 +00002089
2090 // It's possible for the setcc instruction to be anywhere in the loop, and
2091 // possible for it to have multiple users. If it is not immediately before
2092 // the exiting block branch, move it.
Dan Gohman45774ce2010-02-12 10:34:29 +00002093 if (&*++BasicBlock::iterator(Cond) != TermBr) {
2094 if (Cond->hasOneUse()) {
Evan Chengba4e5da72009-11-17 18:10:11 +00002095 Cond->moveBefore(TermBr);
2096 } else {
Dan Gohman45774ce2010-02-12 10:34:29 +00002097 // Clone the terminating condition and insert into the loopend.
2098 ICmpInst *OldCond = Cond;
Evan Chengba4e5da72009-11-17 18:10:11 +00002099 Cond = cast<ICmpInst>(Cond->clone());
2100 Cond->setName(L->getHeader()->getName() + ".termcond");
2101 ExitingBlock->getInstList().insert(TermBr, Cond);
2102
2103 // Clone the IVUse, as the old use still exists!
Andrew Trickfc4ccb22011-06-21 15:43:52 +00002104 CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
Dan Gohman45774ce2010-02-12 10:34:29 +00002105 TermBr->replaceUsesOfWith(OldCond, Cond);
Evan Chengba4e5da72009-11-17 18:10:11 +00002106 }
Evan Cheng85a9f432009-11-12 07:35:05 +00002107 }
2108
Evan Chengba4e5da72009-11-17 18:10:11 +00002109 // If we get to here, we know that we can transform the setcc instruction to
2110 // use the post-incremented version of the IV, allowing us to coalesce the
2111 // live ranges for the IV correctly.
Dan Gohmand006ab92010-04-07 22:27:08 +00002112 CondUse->transformToPostInc(L);
Evan Chengba4e5da72009-11-17 18:10:11 +00002113 Changed = true;
2114
Dan Gohman45774ce2010-02-12 10:34:29 +00002115 PostIncs.insert(Cond);
2116 decline_post_inc:;
Dan Gohman51ad99d2010-01-21 02:09:26 +00002117 }
Dan Gohman45774ce2010-02-12 10:34:29 +00002118
2119 // Determine an insertion point for the loop induction variable increment. It
2120 // must dominate all the post-inc comparisons we just set up, and it must
2121 // dominate the loop latch edge.
2122 IVIncInsertPos = L->getLoopLatch()->getTerminator();
2123 for (SmallPtrSet<Instruction *, 4>::const_iterator I = PostIncs.begin(),
2124 E = PostIncs.end(); I != E; ++I) {
2125 BasicBlock *BB =
2126 DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
2127 (*I)->getParent());
2128 if (BB == (*I)->getParent())
2129 IVIncInsertPos = *I;
2130 else if (BB != IVIncInsertPos->getParent())
2131 IVIncInsertPos = BB->getTerminator();
2132 }
Dan Gohman51ad99d2010-01-21 02:09:26 +00002133}
2134
Chris Lattner0ab5e2c2011-04-15 05:18:47 +00002135/// reconcileNewOffset - Determine if the given use can accommodate a fixup
Dan Gohmana4ca28a2010-05-20 20:52:00 +00002136/// at the given offset and other details. If so, update the use and
2137/// return true.
Dan Gohman45774ce2010-02-12 10:34:29 +00002138bool
Dan Gohman110ed642010-09-01 01:45:53 +00002139LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
Chris Lattner229907c2011-07-18 04:54:35 +00002140 LSRUse::KindType Kind, Type *AccessTy) {
Dan Gohman110ed642010-09-01 01:45:53 +00002141 int64_t NewMinOffset = LU.MinOffset;
2142 int64_t NewMaxOffset = LU.MaxOffset;
Chris Lattner229907c2011-07-18 04:54:35 +00002143 Type *NewAccessTy = AccessTy;
Dan Gohman045f8192010-01-22 00:46:49 +00002144
Dan Gohman45774ce2010-02-12 10:34:29 +00002145 // Check for a mismatched kind. It's tempting to collapse mismatched kinds to
2146 // something conservative, however this can pessimize in the case that one of
2147 // the uses will have all its uses outside the loop, for example.
2148 if (LU.Kind != Kind)
Dan Gohman045f8192010-01-22 00:46:49 +00002149 return false;
Dan Gohman45774ce2010-02-12 10:34:29 +00002150 // Conservatively assume HasBaseReg is true for now.
Dan Gohman110ed642010-09-01 01:45:53 +00002151 if (NewOffset < LU.MinOffset) {
Chandler Carruth26c59fa2013-01-07 14:41:08 +00002152 if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
2153 LU.MaxOffset - NewOffset, HasBaseReg))
Dan Gohman045f8192010-01-22 00:46:49 +00002154 return false;
Dan Gohman110ed642010-09-01 01:45:53 +00002155 NewMinOffset = NewOffset;
2156 } else if (NewOffset > LU.MaxOffset) {
Chandler Carruth26c59fa2013-01-07 14:41:08 +00002157 if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
2158 NewOffset - LU.MinOffset, HasBaseReg))
Dan Gohman045f8192010-01-22 00:46:49 +00002159 return false;
Dan Gohman110ed642010-09-01 01:45:53 +00002160 NewMaxOffset = NewOffset;
Dan Gohman51ad99d2010-01-21 02:09:26 +00002161 }
Dan Gohman45774ce2010-02-12 10:34:29 +00002162 // Check for a mismatched access type, and fall back conservatively as needed.
Dan Gohman32655902010-06-19 21:30:18 +00002163 // TODO: Be less conservative when the type is similar and can use the same
2164 // addressing modes.
Dan Gohman45774ce2010-02-12 10:34:29 +00002165 if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
Dan Gohman110ed642010-09-01 01:45:53 +00002166 NewAccessTy = Type::getVoidTy(AccessTy->getContext());
Dan Gohman51ad99d2010-01-21 02:09:26 +00002167
Dan Gohman45774ce2010-02-12 10:34:29 +00002168 // Update the use.
Dan Gohman110ed642010-09-01 01:45:53 +00002169 LU.MinOffset = NewMinOffset;
2170 LU.MaxOffset = NewMaxOffset;
2171 LU.AccessTy = NewAccessTy;
2172 if (NewOffset != LU.Offsets.back())
2173 LU.Offsets.push_back(NewOffset);
Dan Gohman29916e02010-01-21 22:42:49 +00002174 return true;
2175}
2176
Dan Gohman45774ce2010-02-12 10:34:29 +00002177/// getUse - Return an LSRUse index and an offset value for a fixup which
2178/// needs the given expression, with the given kind and optional access type.
Dan Gohman8b0a4192010-03-01 17:49:51 +00002179/// Either reuse an existing use or create a new one, as needed.
Dan Gohman45774ce2010-02-12 10:34:29 +00002180std::pair<size_t, int64_t>
2181LSRInstance::getUse(const SCEV *&Expr,
Chris Lattner229907c2011-07-18 04:54:35 +00002182 LSRUse::KindType Kind, Type *AccessTy) {
Dan Gohman45774ce2010-02-12 10:34:29 +00002183 const SCEV *Copy = Expr;
2184 int64_t Offset = ExtractImmediate(Expr, SE);
Evan Cheng85a9f432009-11-12 07:35:05 +00002185
Dan Gohman45774ce2010-02-12 10:34:29 +00002186 // Basic uses can't accept any offset, for example.
Chandler Carruth26c59fa2013-01-07 14:41:08 +00002187 if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
2188 Offset, /*HasBaseReg=*/ true)) {
Dan Gohman45774ce2010-02-12 10:34:29 +00002189 Expr = Copy;
2190 Offset = 0;
2191 }
2192
2193 std::pair<UseMapTy::iterator, bool> P =
Benjamin Kramer62fb0cf2014-03-15 17:17:48 +00002194 UseMap.insert(std::make_pair(LSRUse::SCEVUseKindPair(Expr, Kind), 0));
Dan Gohman45774ce2010-02-12 10:34:29 +00002195 if (!P.second) {
2196 // A use already existed with this base.
2197 size_t LUIdx = P.first->second;
2198 LSRUse &LU = Uses[LUIdx];
Dan Gohman110ed642010-09-01 01:45:53 +00002199 if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy))
Dan Gohman45774ce2010-02-12 10:34:29 +00002200 // Reuse this use.
2201 return std::make_pair(LUIdx, Offset);
2202 }
2203
2204 // Create a new use.
2205 size_t LUIdx = Uses.size();
2206 P.first->second = LUIdx;
2207 Uses.push_back(LSRUse(Kind, AccessTy));
2208 LSRUse &LU = Uses[LUIdx];
2209
Dan Gohman110ed642010-09-01 01:45:53 +00002210 // We don't need to track redundant offsets, but we don't need to go out
2211 // of our way here to avoid them.
2212 if (LU.Offsets.empty() || Offset != LU.Offsets.back())
2213 LU.Offsets.push_back(Offset);
2214
Dan Gohman45774ce2010-02-12 10:34:29 +00002215 LU.MinOffset = Offset;
2216 LU.MaxOffset = Offset;
2217 return std::make_pair(LUIdx, Offset);
2218}
2219
Dan Gohman80a96082010-05-20 15:17:54 +00002220/// DeleteUse - Delete the given use from the Uses list.
Dan Gohmana7b68d62010-10-07 23:33:43 +00002221void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
Dan Gohman110ed642010-09-01 01:45:53 +00002222 if (&LU != &Uses.back())
Dan Gohman80a96082010-05-20 15:17:54 +00002223 std::swap(LU, Uses.back());
2224 Uses.pop_back();
Dan Gohmana7b68d62010-10-07 23:33:43 +00002225
2226 // Update RegUses.
2227 RegUses.SwapAndDropUse(LUIdx, Uses.size());
Dan Gohman80a96082010-05-20 15:17:54 +00002228}
2229
Dan Gohman20fab452010-05-19 23:43:12 +00002230/// FindUseWithFormula - Look for a use distinct from OrigLU which is has
2231/// a formula that has the same registers as the given formula.
2232LSRUse *
2233LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
Dan Gohman110ed642010-09-01 01:45:53 +00002234 const LSRUse &OrigLU) {
2235 // Search all uses for the formula. This could be more clever.
Dan Gohman20fab452010-05-19 23:43:12 +00002236 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
2237 LSRUse &LU = Uses[LUIdx];
Dan Gohmanb6a520d2010-08-29 15:27:08 +00002238 // Check whether this use is close enough to OrigLU, to see whether it's
2239 // worthwhile looking through its formulae.
2240 // Ignore ICmpZero uses because they may contain formulae generated by
2241 // GenerateICmpZeroScales, in which case adding fixup offsets may
2242 // be invalid.
Dan Gohman20fab452010-05-19 23:43:12 +00002243 if (&LU != &OrigLU &&
2244 LU.Kind != LSRUse::ICmpZero &&
2245 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
Dan Gohman14152082010-07-15 20:24:58 +00002246 LU.WidestFixupType == OrigLU.WidestFixupType &&
Dan Gohman20fab452010-05-19 23:43:12 +00002247 LU.HasFormulaWithSameRegs(OrigF)) {
Dan Gohmanb6a520d2010-08-29 15:27:08 +00002248 // Scan through this use's formulae.
Dan Gohman927bcaa2010-05-20 20:33:18 +00002249 for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
2250 E = LU.Formulae.end(); I != E; ++I) {
2251 const Formula &F = *I;
Dan Gohmanb6a520d2010-08-29 15:27:08 +00002252 // Check to see if this formula has the same registers and symbols
2253 // as OrigF.
Dan Gohman20fab452010-05-19 23:43:12 +00002254 if (F.BaseRegs == OrigF.BaseRegs &&
2255 F.ScaledReg == OrigF.ScaledReg &&
Chandler Carruth6e479322013-01-07 15:04:40 +00002256 F.BaseGV == OrigF.BaseGV &&
2257 F.Scale == OrigF.Scale &&
Dan Gohman6136e942011-05-03 00:46:49 +00002258 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
Chandler Carruth6e479322013-01-07 15:04:40 +00002259 if (F.BaseOffset == 0)
Dan Gohman20fab452010-05-19 23:43:12 +00002260 return &LU;
Dan Gohmanb6a520d2010-08-29 15:27:08 +00002261 // This is the formula where all the registers and symbols matched;
2262 // there aren't going to be any others. Since we declined it, we
Benjamin Kramerbde91762012-06-02 10:20:22 +00002263 // can skip the rest of the formulae and proceed to the next LSRUse.
Dan Gohman20fab452010-05-19 23:43:12 +00002264 break;
2265 }
2266 }
2267 }
2268 }
2269
Dan Gohmanb6a520d2010-08-29 15:27:08 +00002270 // Nothing looked good.
Dan Gohman20fab452010-05-19 23:43:12 +00002271 return 0;
2272}
2273
Dan Gohman45774ce2010-02-12 10:34:29 +00002274void LSRInstance::CollectInterestingTypesAndFactors() {
2275 SmallSetVector<const SCEV *, 4> Strides;
2276
Dan Gohman2446f572010-02-19 00:05:23 +00002277 // Collect interesting types and strides.
Dan Gohmand006ab92010-04-07 22:27:08 +00002278 SmallVector<const SCEV *, 4> Worklist;
Dan Gohman45774ce2010-02-12 10:34:29 +00002279 for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
Dan Gohmane637ff52010-04-19 21:48:58 +00002280 const SCEV *Expr = IU.getExpr(*UI);
Dan Gohman45774ce2010-02-12 10:34:29 +00002281
2282 // Collect interesting types.
Dan Gohmand006ab92010-04-07 22:27:08 +00002283 Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
Dan Gohman45774ce2010-02-12 10:34:29 +00002284
Dan Gohmand006ab92010-04-07 22:27:08 +00002285 // Add strides for mentioned loops.
2286 Worklist.push_back(Expr);
2287 do {
2288 const SCEV *S = Worklist.pop_back_val();
2289 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
Andrew Trickd97b83e2012-03-22 22:42:45 +00002290 if (AR->getLoop() == L)
Andrew Tricke8b4f402011-12-10 00:25:00 +00002291 Strides.insert(AR->getStepRecurrence(SE));
Dan Gohmand006ab92010-04-07 22:27:08 +00002292 Worklist.push_back(AR->getStart());
2293 } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
Dan Gohmandd41bba2010-06-21 19:47:52 +00002294 Worklist.append(Add->op_begin(), Add->op_end());
Dan Gohmand006ab92010-04-07 22:27:08 +00002295 }
2296 } while (!Worklist.empty());
Dan Gohman2446f572010-02-19 00:05:23 +00002297 }
2298
2299 // Compute interesting factors from the set of interesting strides.
2300 for (SmallSetVector<const SCEV *, 4>::const_iterator
2301 I = Strides.begin(), E = Strides.end(); I != E; ++I)
Dan Gohman45774ce2010-02-12 10:34:29 +00002302 for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +00002303 std::next(I); NewStrideIter != E; ++NewStrideIter) {
Dan Gohman2446f572010-02-19 00:05:23 +00002304 const SCEV *OldStride = *I;
Dan Gohman45774ce2010-02-12 10:34:29 +00002305 const SCEV *NewStride = *NewStrideIter;
Dan Gohman45774ce2010-02-12 10:34:29 +00002306
2307 if (SE.getTypeSizeInBits(OldStride->getType()) !=
2308 SE.getTypeSizeInBits(NewStride->getType())) {
2309 if (SE.getTypeSizeInBits(OldStride->getType()) >
2310 SE.getTypeSizeInBits(NewStride->getType()))
2311 NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
2312 else
2313 OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
2314 }
2315 if (const SCEVConstant *Factor =
Dan Gohman4eebb942010-02-19 19:35:48 +00002316 dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
2317 SE, true))) {
Dan Gohman45774ce2010-02-12 10:34:29 +00002318 if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
2319 Factors.insert(Factor->getValue()->getValue().getSExtValue());
2320 } else if (const SCEVConstant *Factor =
Dan Gohman8c16b382010-02-22 04:11:59 +00002321 dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
2322 NewStride,
Dan Gohman4eebb942010-02-19 19:35:48 +00002323 SE, true))) {
Dan Gohman45774ce2010-02-12 10:34:29 +00002324 if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
2325 Factors.insert(Factor->getValue()->getValue().getSExtValue());
2326 }
2327 }
Dan Gohman45774ce2010-02-12 10:34:29 +00002328
2329 // If all uses use the same type, don't bother looking for truncation-based
2330 // reuse.
2331 if (Types.size() == 1)
2332 Types.clear();
2333
2334 DEBUG(print_factors_and_types(dbgs()));
2335}
2336
Andrew Trick29fe5f02012-01-09 19:50:34 +00002337/// findIVOperand - Helper for CollectChains that finds an IV operand (computed
2338/// by an AddRec in this loop) within [OI,OE) or returns OE. If IVUsers mapped
2339/// Instructions to IVStrideUses, we could partially skip this.
2340static User::op_iterator
2341findIVOperand(User::op_iterator OI, User::op_iterator OE,
2342 Loop *L, ScalarEvolution &SE) {
2343 for(; OI != OE; ++OI) {
2344 if (Instruction *Oper = dyn_cast<Instruction>(*OI)) {
2345 if (!SE.isSCEVable(Oper->getType()))
2346 continue;
2347
2348 if (const SCEVAddRecExpr *AR =
2349 dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Oper))) {
2350 if (AR->getLoop() == L)
2351 break;
2352 }
2353 }
2354 }
2355 return OI;
2356}
2357
2358/// getWideOperand - IVChain logic must consistenctly peek base TruncInst
2359/// operands, so wrap it in a convenient helper.
2360static Value *getWideOperand(Value *Oper) {
2361 if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
2362 return Trunc->getOperand(0);
2363 return Oper;
2364}
2365
2366/// isCompatibleIVType - Return true if we allow an IV chain to include both
2367/// types.
2368static bool isCompatibleIVType(Value *LVal, Value *RVal) {
2369 Type *LType = LVal->getType();
2370 Type *RType = RVal->getType();
2371 return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy());
2372}
2373
Andrew Trickd5d2db92012-01-10 01:45:08 +00002374/// getExprBase - Return an approximation of this SCEV expression's "base", or
2375/// NULL for any constant. Returning the expression itself is
2376/// conservative. Returning a deeper subexpression is more precise and valid as
2377/// long as it isn't less complex than another subexpression. For expressions
2378/// involving multiple unscaled values, we need to return the pointer-type
2379/// SCEVUnknown. This avoids forming chains across objects, such as:
2380/// PrevOper==a[i], IVOper==b[i], IVInc==b-a.
2381///
2382/// Since SCEVUnknown is the rightmost type, and pointers are the rightmost
2383/// SCEVUnknown, we simply return the rightmost SCEV operand.
2384static const SCEV *getExprBase(const SCEV *S) {
2385 switch (S->getSCEVType()) {
2386 default: // uncluding scUnknown.
2387 return S;
2388 case scConstant:
2389 return 0;
2390 case scTruncate:
2391 return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
2392 case scZeroExtend:
2393 return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
2394 case scSignExtend:
2395 return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
2396 case scAddExpr: {
2397 // Skip over scaled operands (scMulExpr) to follow add operands as long as
2398 // there's nothing more complex.
2399 // FIXME: not sure if we want to recognize negation.
2400 const SCEVAddExpr *Add = cast<SCEVAddExpr>(S);
2401 for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(Add->op_end()),
2402 E(Add->op_begin()); I != E; ++I) {
2403 const SCEV *SubExpr = *I;
2404 if (SubExpr->getSCEVType() == scAddExpr)
2405 return getExprBase(SubExpr);
2406
2407 if (SubExpr->getSCEVType() != scMulExpr)
2408 return SubExpr;
2409 }
2410 return S; // all operands are scaled, be conservative.
2411 }
2412 case scAddRecExpr:
2413 return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
2414 }
2415}
2416
Andrew Trick248d4102012-01-09 21:18:52 +00002417/// Return true if the chain increment is profitable to expand into a loop
2418/// invariant value, which may require its own register. A profitable chain
2419/// increment will be an offset relative to the same base. We allow such offsets
2420/// to potentially be used as chain increment as long as it's not obviously
2421/// expensive to expand using real instructions.
Jakob Stoklund Olesenc90abc82012-04-26 23:33:11 +00002422bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
2423 const SCEV *IncExpr,
2424 ScalarEvolution &SE) {
2425 // Aggressively form chains when -stress-ivchain.
Andrew Trick248d4102012-01-09 21:18:52 +00002426 if (StressIVChain)
Jakob Stoklund Olesenc90abc82012-04-26 23:33:11 +00002427 return true;
Andrew Trick248d4102012-01-09 21:18:52 +00002428
Andrew Trickd5d2db92012-01-10 01:45:08 +00002429 // Do not replace a constant offset from IV head with a nonconstant IV
2430 // increment.
2431 if (!isa<SCEVConstant>(IncExpr)) {
Jakob Stoklund Olesenc90abc82012-04-26 23:33:11 +00002432 const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand));
Andrew Trickd5d2db92012-01-10 01:45:08 +00002433 if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr)))
2434 return 0;
2435 }
2436
2437 SmallPtrSet<const SCEV*, 8> Processed;
Jakob Stoklund Olesenc90abc82012-04-26 23:33:11 +00002438 return !isHighCostExpansion(IncExpr, Processed, SE);
Andrew Trick248d4102012-01-09 21:18:52 +00002439}
2440
2441/// Return true if the number of registers needed for the chain is estimated to
2442/// be less than the number required for the individual IV users. First prohibit
2443/// any IV users that keep the IV live across increments (the Users set should
2444/// be empty). Next count the number and type of increments in the chain.
2445///
2446/// Chaining IVs can lead to considerable code bloat if ISEL doesn't
2447/// effectively use postinc addressing modes. Only consider it profitable it the
2448/// increments can be computed in fewer registers when chained.
2449///
2450/// TODO: Consider IVInc free if it's already used in another chains.
2451static bool
2452isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
Chandler Carruth26c59fa2013-01-07 14:41:08 +00002453 ScalarEvolution &SE, const TargetTransformInfo &TTI) {
Andrew Trick248d4102012-01-09 21:18:52 +00002454 if (StressIVChain)
2455 return true;
2456
Jakob Stoklund Olesena0337d72012-04-26 23:33:09 +00002457 if (!Chain.hasIncs())
Andrew Trickd5d2db92012-01-10 01:45:08 +00002458 return false;
2459
2460 if (!Users.empty()) {
Jakob Stoklund Olesena0337d72012-04-26 23:33:09 +00002461 DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";
Andrew Trickd5d2db92012-01-10 01:45:08 +00002462 for (SmallPtrSet<Instruction*, 4>::const_iterator I = Users.begin(),
2463 E = Users.end(); I != E; ++I) {
2464 dbgs() << " " << **I << "\n";
2465 });
2466 return false;
2467 }
Jakob Stoklund Olesena0337d72012-04-26 23:33:09 +00002468 assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
Andrew Trickd5d2db92012-01-10 01:45:08 +00002469
2470 // The chain itself may require a register, so intialize cost to 1.
2471 int cost = 1;
2472
2473 // A complete chain likely eliminates the need for keeping the original IV in
2474 // a register. LSR does not currently know how to form a complete chain unless
2475 // the header phi already exists.
Jakob Stoklund Olesena0337d72012-04-26 23:33:09 +00002476 if (isa<PHINode>(Chain.tailUserInst())
2477 && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
Andrew Trickd5d2db92012-01-10 01:45:08 +00002478 --cost;
2479 }
2480 const SCEV *LastIncExpr = 0;
2481 unsigned NumConstIncrements = 0;
2482 unsigned NumVarIncrements = 0;
2483 unsigned NumReusedIncrements = 0;
Jakob Stoklund Olesena0337d72012-04-26 23:33:09 +00002484 for (IVChain::const_iterator I = Chain.begin(), E = Chain.end();
Andrew Trickd5d2db92012-01-10 01:45:08 +00002485 I != E; ++I) {
2486
2487 if (I->IncExpr->isZero())
2488 continue;
2489
2490 // Incrementing by zero or some constant is neutral. We assume constants can
2491 // be folded into an addressing mode or an add's immediate operand.
2492 if (isa<SCEVConstant>(I->IncExpr)) {
2493 ++NumConstIncrements;
2494 continue;
2495 }
2496
2497 if (I->IncExpr == LastIncExpr)
2498 ++NumReusedIncrements;
2499 else
2500 ++NumVarIncrements;
2501
2502 LastIncExpr = I->IncExpr;
2503 }
2504 // An IV chain with a single increment is handled by LSR's postinc
2505 // uses. However, a chain with multiple increments requires keeping the IV's
2506 // value live longer than it needs to be if chained.
2507 if (NumConstIncrements > 1)
2508 --cost;
2509
2510 // Materializing increment expressions in the preheader that didn't exist in
2511 // the original code may cost a register. For example, sign-extended array
2512 // indices can produce ridiculous increments like this:
2513 // IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
2514 cost += NumVarIncrements;
2515
2516 // Reusing variable increments likely saves a register to hold the multiple of
2517 // the stride.
2518 cost -= NumReusedIncrements;
2519
Jakob Stoklund Olesena0337d72012-04-26 23:33:09 +00002520 DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost
2521 << "\n");
Andrew Trickd5d2db92012-01-10 01:45:08 +00002522
2523 return cost < 0;
Andrew Trick248d4102012-01-09 21:18:52 +00002524}
2525
Andrew Trick29fe5f02012-01-09 19:50:34 +00002526/// ChainInstruction - Add this IV user to an existing chain or make it the head
2527/// of a new chain.
2528void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
2529 SmallVectorImpl<ChainUsers> &ChainUsersVec) {
2530 // When IVs are used as types of varying widths, they are generally converted
2531 // to a wider type with some uses remaining narrow under a (free) trunc.
Jakob Stoklund Olesenc90abc82012-04-26 23:33:11 +00002532 Value *const NextIV = getWideOperand(IVOper);
2533 const SCEV *const OperExpr = SE.getSCEV(NextIV);
2534 const SCEV *const OperExprBase = getExprBase(OperExpr);
Andrew Trick29fe5f02012-01-09 19:50:34 +00002535
2536 // Visit all existing chains. Check if its IVOper can be computed as a
2537 // profitable loop invariant increment from the last link in the Chain.
2538 unsigned ChainIdx = 0, NChains = IVChainVec.size();
2539 const SCEV *LastIncExpr = 0;
2540 for (; ChainIdx < NChains; ++ChainIdx) {
Jakob Stoklund Olesenc90abc82012-04-26 23:33:11 +00002541 IVChain &Chain = IVChainVec[ChainIdx];
2542
2543 // Prune the solution space aggressively by checking that both IV operands
2544 // are expressions that operate on the same unscaled SCEVUnknown. This
2545 // "base" will be canceled by the subsequent getMinusSCEV call. Checking
2546 // first avoids creating extra SCEV expressions.
2547 if (!StressIVChain && Chain.ExprBase != OperExprBase)
2548 continue;
2549
2550 Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand);
Andrew Trick29fe5f02012-01-09 19:50:34 +00002551 if (!isCompatibleIVType(PrevIV, NextIV))
2552 continue;
2553
Andrew Trick356a8962012-03-26 20:28:35 +00002554 // A phi node terminates a chain.
Jakob Stoklund Olesenc90abc82012-04-26 23:33:11 +00002555 if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
Andrew Trick29fe5f02012-01-09 19:50:34 +00002556 continue;
2557
Jakob Stoklund Olesenc90abc82012-04-26 23:33:11 +00002558 // The increment must be loop-invariant so it can be kept in a register.
2559 const SCEV *PrevExpr = SE.getSCEV(PrevIV);
2560 const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
2561 if (!SE.isLoopInvariant(IncExpr, L))
2562 continue;
2563
2564 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
Andrew Trick29fe5f02012-01-09 19:50:34 +00002565 LastIncExpr = IncExpr;
2566 break;
2567 }
2568 }
2569 // If we haven't found a chain, create a new one, unless we hit the max. Don't
2570 // bother for phi nodes, because they must be last in the chain.
2571 if (ChainIdx == NChains) {
2572 if (isa<PHINode>(UserInst))
2573 return;
Andrew Trick248d4102012-01-09 21:18:52 +00002574 if (NChains >= MaxChains && !StressIVChain) {
Andrew Trick29fe5f02012-01-09 19:50:34 +00002575 DEBUG(dbgs() << "IV Chain Limit\n");
2576 return;
2577 }
Jakob Stoklund Olesenc90abc82012-04-26 23:33:11 +00002578 LastIncExpr = OperExpr;
Andrew Trickb9c822a2012-01-20 21:23:40 +00002579 // IVUsers may have skipped over sign/zero extensions. We don't currently
2580 // attempt to form chains involving extensions unless they can be hoisted
2581 // into this loop's AddRec.
2582 if (!isa<SCEVAddRecExpr>(LastIncExpr))
2583 return;
Andrew Trick29fe5f02012-01-09 19:50:34 +00002584 ++NChains;
Jakob Stoklund Olesenc90abc82012-04-26 23:33:11 +00002585 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
2586 OperExprBase));
Andrew Trick29fe5f02012-01-09 19:50:34 +00002587 ChainUsersVec.resize(NChains);
Jakob Stoklund Olesen293673d2012-04-25 18:01:32 +00002588 DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst
2589 << ") IV=" << *LastIncExpr << "\n");
Jakob Stoklund Olesenc90abc82012-04-26 23:33:11 +00002590 } else {
Jakob Stoklund Olesen293673d2012-04-25 18:01:32 +00002591 DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst
2592 << ") IV+" << *LastIncExpr << "\n");
Jakob Stoklund Olesenc90abc82012-04-26 23:33:11 +00002593 // Add this IV user to the end of the chain.
2594 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
2595 }
Andrew Trickbc705902013-02-09 01:11:01 +00002596 IVChain &Chain = IVChainVec[ChainIdx];
Andrew Trick29fe5f02012-01-09 19:50:34 +00002597
2598 SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
2599 // This chain's NearUsers become FarUsers.
2600 if (!LastIncExpr->isZero()) {
2601 ChainUsersVec[ChainIdx].FarUsers.insert(NearUsers.begin(),
2602 NearUsers.end());
2603 NearUsers.clear();
2604 }
2605
2606 // All other uses of IVOperand become near uses of the chain.
2607 // We currently ignore intermediate values within SCEV expressions, assuming
2608 // they will eventually be used be the current chain, or can be computed
2609 // from one of the chain increments. To be more precise we could
2610 // transitively follow its user and only add leaf IV users to the set.
Chandler Carruthcdf47882014-03-09 03:16:01 +00002611 for (User *U : IVOper->users()) {
2612 Instruction *OtherUse = dyn_cast<Instruction>(U);
Andrew Trickbc705902013-02-09 01:11:01 +00002613 if (!OtherUse)
Andrew Tricke51feea2012-03-26 18:03:16 +00002614 continue;
Andrew Trickbc705902013-02-09 01:11:01 +00002615 // Uses in the chain will no longer be uses if the chain is formed.
2616 // Include the head of the chain in this iteration (not Chain.begin()).
2617 IVChain::const_iterator IncIter = Chain.Incs.begin();
2618 IVChain::const_iterator IncEnd = Chain.Incs.end();
2619 for( ; IncIter != IncEnd; ++IncIter) {
2620 if (IncIter->UserInst == OtherUse)
2621 break;
2622 }
2623 if (IncIter != IncEnd)
2624 continue;
2625
Andrew Trick29fe5f02012-01-09 19:50:34 +00002626 if (SE.isSCEVable(OtherUse->getType())
2627 && !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
2628 && IU.isIVUserOrOperand(OtherUse)) {
2629 continue;
2630 }
Andrew Tricke51feea2012-03-26 18:03:16 +00002631 NearUsers.insert(OtherUse);
Andrew Trick29fe5f02012-01-09 19:50:34 +00002632 }
2633
2634 // Since this user is part of the chain, it's no longer considered a use
2635 // of the chain.
2636 ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);
2637}
2638
2639/// CollectChains - Populate the vector of Chains.
2640///
2641/// This decreases ILP at the architecture level. Targets with ample registers,
2642/// multiple memory ports, and no register renaming probably don't want
2643/// this. However, such targets should probably disable LSR altogether.
2644///
2645/// The job of LSR is to make a reasonable choice of induction variables across
2646/// the loop. Subsequent passes can easily "unchain" computation exposing more
2647/// ILP *within the loop* if the target wants it.
2648///
2649/// Finding the best IV chain is potentially a scheduling problem. Since LSR
2650/// will not reorder memory operations, it will recognize this as a chain, but
2651/// will generate redundant IV increments. Ideally this would be corrected later
2652/// by a smart scheduler:
2653/// = A[i]
2654/// = A[i+x]
2655/// A[i] =
2656/// A[i+x] =
2657///
2658/// TODO: Walk the entire domtree within this loop, not just the path to the
2659/// loop latch. This will discover chains on side paths, but requires
2660/// maintaining multiple copies of the Chains state.
2661void LSRInstance::CollectChains() {
Jakob Stoklund Olesen293673d2012-04-25 18:01:32 +00002662 DEBUG(dbgs() << "Collecting IV Chains.\n");
Andrew Trick29fe5f02012-01-09 19:50:34 +00002663 SmallVector<ChainUsers, 8> ChainUsersVec;
2664
2665 SmallVector<BasicBlock *,8> LatchPath;
2666 BasicBlock *LoopHeader = L->getHeader();
2667 for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch());
2668 Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {
2669 LatchPath.push_back(Rung->getBlock());
2670 }
2671 LatchPath.push_back(LoopHeader);
2672
2673 // Walk the instruction stream from the loop header to the loop latch.
2674 for (SmallVectorImpl<BasicBlock *>::reverse_iterator
2675 BBIter = LatchPath.rbegin(), BBEnd = LatchPath.rend();
2676 BBIter != BBEnd; ++BBIter) {
2677 for (BasicBlock::iterator I = (*BBIter)->begin(), E = (*BBIter)->end();
2678 I != E; ++I) {
2679 // Skip instructions that weren't seen by IVUsers analysis.
2680 if (isa<PHINode>(I) || !IU.isIVUserOrOperand(I))
2681 continue;
2682
2683 // Ignore users that are part of a SCEV expression. This way we only
2684 // consider leaf IV Users. This effectively rediscovers a portion of
2685 // IVUsers analysis but in program order this time.
2686 if (SE.isSCEVable(I->getType()) && !isa<SCEVUnknown>(SE.getSCEV(I)))
2687 continue;
2688
2689 // Remove this instruction from any NearUsers set it may be in.
2690 for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
2691 ChainIdx < NChains; ++ChainIdx) {
2692 ChainUsersVec[ChainIdx].NearUsers.erase(I);
2693 }
2694 // Search for operands that can be chained.
2695 SmallPtrSet<Instruction*, 4> UniqueOperands;
2696 User::op_iterator IVOpEnd = I->op_end();
2697 User::op_iterator IVOpIter = findIVOperand(I->op_begin(), IVOpEnd, L, SE);
2698 while (IVOpIter != IVOpEnd) {
2699 Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
2700 if (UniqueOperands.insert(IVOpInst))
2701 ChainInstruction(I, IVOpInst, ChainUsersVec);
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +00002702 IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
Andrew Trick29fe5f02012-01-09 19:50:34 +00002703 }
2704 } // Continue walking down the instructions.
2705 } // Continue walking down the domtree.
2706 // Visit phi backedges to determine if the chain can generate the IV postinc.
2707 for (BasicBlock::iterator I = L->getHeader()->begin();
2708 PHINode *PN = dyn_cast<PHINode>(I); ++I) {
2709 if (!SE.isSCEVable(PN->getType()))
2710 continue;
2711
2712 Instruction *IncV =
2713 dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
2714 if (IncV)
2715 ChainInstruction(PN, IncV, ChainUsersVec);
2716 }
Andrew Trick248d4102012-01-09 21:18:52 +00002717 // Remove any unprofitable chains.
2718 unsigned ChainIdx = 0;
2719 for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
2720 UsersIdx < NChains; ++UsersIdx) {
2721 if (!isProfitableChain(IVChainVec[UsersIdx],
Chandler Carruth26c59fa2013-01-07 14:41:08 +00002722 ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
Andrew Trick248d4102012-01-09 21:18:52 +00002723 continue;
2724 // Preserve the chain at UsesIdx.
2725 if (ChainIdx != UsersIdx)
2726 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
2727 FinalizeChain(IVChainVec[ChainIdx]);
2728 ++ChainIdx;
2729 }
2730 IVChainVec.resize(ChainIdx);
2731}
2732
2733void LSRInstance::FinalizeChain(IVChain &Chain) {
Jakob Stoklund Olesena0337d72012-04-26 23:33:09 +00002734 assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
2735 DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
Andrew Trick248d4102012-01-09 21:18:52 +00002736
Jakob Stoklund Olesena0337d72012-04-26 23:33:09 +00002737 for (IVChain::const_iterator I = Chain.begin(), E = Chain.end();
Andrew Trick248d4102012-01-09 21:18:52 +00002738 I != E; ++I) {
2739 DEBUG(dbgs() << " Inc: " << *I->UserInst << "\n");
2740 User::op_iterator UseI =
2741 std::find(I->UserInst->op_begin(), I->UserInst->op_end(), I->IVOperand);
2742 assert(UseI != I->UserInst->op_end() && "cannot find IV operand");
2743 IVIncSet.insert(UseI);
2744 }
2745}
2746
2747/// Return true if the IVInc can be folded into an addressing mode.
2748static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
Chandler Carruth26c59fa2013-01-07 14:41:08 +00002749 Value *Operand, const TargetTransformInfo &TTI) {
Andrew Trick248d4102012-01-09 21:18:52 +00002750 const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
2751 if (!IncConst || !isAddressUse(UserInst, Operand))
2752 return false;
2753
2754 if (IncConst->getValue()->getValue().getMinSignedBits() > 64)
2755 return false;
2756
2757 int64_t IncOffset = IncConst->getValue()->getSExtValue();
Chandler Carruth26c59fa2013-01-07 14:41:08 +00002758 if (!isAlwaysFoldable(TTI, LSRUse::Address,
2759 getAccessType(UserInst), /*BaseGV=*/ 0,
2760 IncOffset, /*HaseBaseReg=*/ false))
Andrew Trick248d4102012-01-09 21:18:52 +00002761 return false;
2762
2763 return true;
2764}
2765
2766/// GenerateIVChains - Generate an add or subtract for each IVInc in a chain to
2767/// materialize the IV user's operand from the previous IV user's operand.
2768void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
2769 SmallVectorImpl<WeakVH> &DeadInsts) {
2770 // Find the new IVOperand for the head of the chain. It may have been replaced
2771 // by LSR.
Jakob Stoklund Olesena0337d72012-04-26 23:33:09 +00002772 const IVInc &Head = Chain.Incs[0];
Andrew Trick248d4102012-01-09 21:18:52 +00002773 User::op_iterator IVOpEnd = Head.UserInst->op_end();
Andrew Trickf3a25442013-03-19 05:10:27 +00002774 // findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
Andrew Trick248d4102012-01-09 21:18:52 +00002775 User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
2776 IVOpEnd, L, SE);
2777 Value *IVSrc = 0;
Andrew Trickf3a25442013-03-19 05:10:27 +00002778 while (IVOpIter != IVOpEnd) {
Andrew Trick248d4102012-01-09 21:18:52 +00002779 IVSrc = getWideOperand(*IVOpIter);
2780
2781 // If this operand computes the expression that the chain needs, we may use
2782 // it. (Check this after setting IVSrc which is used below.)
2783 //
2784 // Note that if Head.IncExpr is wider than IVSrc, then this phi is too
2785 // narrow for the chain, so we can no longer use it. We do allow using a
2786 // wider phi, assuming the LSR checked for free truncation. In that case we
2787 // should already have a truncate on this operand such that
2788 // getSCEV(IVSrc) == IncExpr.
2789 if (SE.getSCEV(*IVOpIter) == Head.IncExpr
2790 || SE.getSCEV(IVSrc) == Head.IncExpr) {
2791 break;
2792 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +00002793 IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
Andrew Trickf3a25442013-03-19 05:10:27 +00002794 }
Andrew Trick248d4102012-01-09 21:18:52 +00002795 if (IVOpIter == IVOpEnd) {
2796 // Gracefully give up on this chain.
2797 DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n");
2798 return;
2799 }
2800
2801 DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
2802 Type *IVTy = IVSrc->getType();
2803 Type *IntTy = SE.getEffectiveSCEVType(IVTy);
2804 const SCEV *LeftOverExpr = 0;
Jakob Stoklund Olesena0337d72012-04-26 23:33:09 +00002805 for (IVChain::const_iterator IncI = Chain.begin(),
Andrew Trick248d4102012-01-09 21:18:52 +00002806 IncE = Chain.end(); IncI != IncE; ++IncI) {
2807
2808 Instruction *InsertPt = IncI->UserInst;
2809 if (isa<PHINode>(InsertPt))
2810 InsertPt = L->getLoopLatch()->getTerminator();
2811
2812 // IVOper will replace the current IV User's operand. IVSrc is the IV
2813 // value currently held in a register.
2814 Value *IVOper = IVSrc;
2815 if (!IncI->IncExpr->isZero()) {
2816 // IncExpr was the result of subtraction of two narrow values, so must
2817 // be signed.
2818 const SCEV *IncExpr = SE.getNoopOrSignExtend(IncI->IncExpr, IntTy);
2819 LeftOverExpr = LeftOverExpr ?
2820 SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
2821 }
2822 if (LeftOverExpr && !LeftOverExpr->isZero()) {
2823 // Expand the IV increment.
2824 Rewriter.clearPostInc();
2825 Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
2826 const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc),
2827 SE.getUnknown(IncV));
2828 IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
2829
2830 // If an IV increment can't be folded, use it as the next IV value.
2831 if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand,
Chandler Carruth26c59fa2013-01-07 14:41:08 +00002832 TTI)) {
Andrew Trick248d4102012-01-09 21:18:52 +00002833 assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
2834 IVSrc = IVOper;
2835 LeftOverExpr = 0;
2836 }
2837 }
2838 Type *OperTy = IncI->IVOperand->getType();
2839 if (IVTy != OperTy) {
2840 assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&
2841 "cannot extend a chained IV");
2842 IRBuilder<> Builder(InsertPt);
2843 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");
2844 }
2845 IncI->UserInst->replaceUsesOfWith(IncI->IVOperand, IVOper);
2846 DeadInsts.push_back(IncI->IVOperand);
2847 }
2848 // If LSR created a new, wider phi, we may also replace its postinc. We only
2849 // do this if we also found a wide value for the head of the chain.
Jakob Stoklund Olesena0337d72012-04-26 23:33:09 +00002850 if (isa<PHINode>(Chain.tailUserInst())) {
Andrew Trick248d4102012-01-09 21:18:52 +00002851 for (BasicBlock::iterator I = L->getHeader()->begin();
2852 PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
2853 if (!isCompatibleIVType(Phi, IVSrc))
2854 continue;
2855 Instruction *PostIncV = dyn_cast<Instruction>(
2856 Phi->getIncomingValueForBlock(L->getLoopLatch()));
2857 if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))
2858 continue;
2859 Value *IVOper = IVSrc;
2860 Type *PostIncTy = PostIncV->getType();
2861 if (IVTy != PostIncTy) {
2862 assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types");
2863 IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
2864 Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
2865 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");
2866 }
2867 Phi->replaceUsesOfWith(PostIncV, IVOper);
2868 DeadInsts.push_back(PostIncV);
2869 }
2870 }
Andrew Trick29fe5f02012-01-09 19:50:34 +00002871}
2872
Dan Gohman45774ce2010-02-12 10:34:29 +00002873void LSRInstance::CollectFixupsAndInitialFormulae() {
2874 for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
Andrew Trick248d4102012-01-09 21:18:52 +00002875 Instruction *UserInst = UI->getUser();
2876 // Skip IV users that are part of profitable IV Chains.
2877 User::op_iterator UseI = std::find(UserInst->op_begin(), UserInst->op_end(),
2878 UI->getOperandValToReplace());
2879 assert(UseI != UserInst->op_end() && "cannot find IV operand");
2880 if (IVIncSet.count(UseI))
2881 continue;
2882
Dan Gohman45774ce2010-02-12 10:34:29 +00002883 // Record the uses.
2884 LSRFixup &LF = getNewFixup();
Andrew Trick248d4102012-01-09 21:18:52 +00002885 LF.UserInst = UserInst;
Dan Gohman45774ce2010-02-12 10:34:29 +00002886 LF.OperandValToReplace = UI->getOperandValToReplace();
Dan Gohmand006ab92010-04-07 22:27:08 +00002887 LF.PostIncLoops = UI->getPostIncLoops();
Dan Gohman45774ce2010-02-12 10:34:29 +00002888
2889 LSRUse::KindType Kind = LSRUse::Basic;
Chris Lattner229907c2011-07-18 04:54:35 +00002890 Type *AccessTy = 0;
Dan Gohman45774ce2010-02-12 10:34:29 +00002891 if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
2892 Kind = LSRUse::Address;
2893 AccessTy = getAccessType(LF.UserInst);
2894 }
2895
Dan Gohmane637ff52010-04-19 21:48:58 +00002896 const SCEV *S = IU.getExpr(*UI);
Dan Gohman45774ce2010-02-12 10:34:29 +00002897
2898 // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
2899 // (N - i == 0), and this allows (N - i) to be the expression that we work
2900 // with rather than just N or i, so we can consider the register
2901 // requirements for both N and i at the same time. Limiting this code to
2902 // equality icmps is not a problem because all interesting loops use
2903 // equality icmps, thanks to IndVarSimplify.
2904 if (ICmpInst *CI = dyn_cast<ICmpInst>(LF.UserInst))
2905 if (CI->isEquality()) {
2906 // Swap the operands if needed to put the OperandValToReplace on the
2907 // left, for consistency.
2908 Value *NV = CI->getOperand(1);
2909 if (NV == LF.OperandValToReplace) {
2910 CI->setOperand(1, CI->getOperand(0));
2911 CI->setOperand(0, NV);
Dan Gohmanee2fea32010-05-20 19:26:52 +00002912 NV = CI->getOperand(1);
Dan Gohmanfdf98742010-05-20 19:16:03 +00002913 Changed = true;
Dan Gohman45774ce2010-02-12 10:34:29 +00002914 }
2915
2916 // x == y --> x - y == 0
2917 const SCEV *N = SE.getSCEV(NV);
Andrew Trick57243da2013-10-25 21:35:56 +00002918 if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) {
Dan Gohman3268e4d2011-05-18 21:02:18 +00002919 // S is normalized, so normalize N before folding it into S
2920 // to keep the result normalized.
2921 N = TransformForPostIncUse(Normalize, N, CI, 0,
2922 LF.PostIncLoops, SE, DT);
Dan Gohman45774ce2010-02-12 10:34:29 +00002923 Kind = LSRUse::ICmpZero;
2924 S = SE.getMinusSCEV(N, S);
2925 }
2926
2927 // -1 and the negations of all interesting strides (except the negation
2928 // of -1) are now also interesting.
2929 for (size_t i = 0, e = Factors.size(); i != e; ++i)
2930 if (Factors[i] != -1)
2931 Factors.insert(-(uint64_t)Factors[i]);
2932 Factors.insert(-1);
2933 }
2934
2935 // Set up the initial formula for this use.
2936 std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
2937 LF.LUIdx = P.first;
2938 LF.Offset = P.second;
2939 LSRUse &LU = Uses[LF.LUIdx];
Dan Gohmand006ab92010-04-07 22:27:08 +00002940 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
Dan Gohman14152082010-07-15 20:24:58 +00002941 if (!LU.WidestFixupType ||
2942 SE.getTypeSizeInBits(LU.WidestFixupType) <
2943 SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
2944 LU.WidestFixupType = LF.OperandValToReplace->getType();
Dan Gohman45774ce2010-02-12 10:34:29 +00002945
2946 // If this is the first use of this LSRUse, give it a formula.
2947 if (LU.Formulae.empty()) {
Dan Gohman8c16b382010-02-22 04:11:59 +00002948 InsertInitialFormula(S, LU, LF.LUIdx);
Dan Gohman45774ce2010-02-12 10:34:29 +00002949 CountRegisters(LU.Formulae.back(), LF.LUIdx);
2950 }
2951 }
2952
2953 DEBUG(print_fixups(dbgs()));
2954}
2955
Dan Gohmana4ca28a2010-05-20 20:52:00 +00002956/// InsertInitialFormula - Insert a formula for the given expression into
2957/// the given use, separating out loop-variant portions from loop-invariant
2958/// and loop-computable portions.
Dan Gohman45774ce2010-02-12 10:34:29 +00002959void
Dan Gohman8c16b382010-02-22 04:11:59 +00002960LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
Andrew Trick57243da2013-10-25 21:35:56 +00002961 // Mark uses whose expressions cannot be expanded.
2962 if (!isSafeToExpand(S, SE))
2963 LU.RigidFormula = true;
2964
Dan Gohman45774ce2010-02-12 10:34:29 +00002965 Formula F;
Dan Gohman20d9ce22010-11-17 21:41:58 +00002966 F.InitialMatch(S, L, SE);
Dan Gohman45774ce2010-02-12 10:34:29 +00002967 bool Inserted = InsertFormula(LU, LUIdx, F);
2968 assert(Inserted && "Initial formula already exists!"); (void)Inserted;
2969}
2970
Dan Gohmana4ca28a2010-05-20 20:52:00 +00002971/// InsertSupplementalFormula - Insert a simple single-register formula for
2972/// the given expression into the given use.
Dan Gohman45774ce2010-02-12 10:34:29 +00002973void
2974LSRInstance::InsertSupplementalFormula(const SCEV *S,
2975 LSRUse &LU, size_t LUIdx) {
2976 Formula F;
2977 F.BaseRegs.push_back(S);
Chandler Carruth7e31c8f2013-01-12 23:46:04 +00002978 F.HasBaseReg = true;
Dan Gohman45774ce2010-02-12 10:34:29 +00002979 bool Inserted = InsertFormula(LU, LUIdx, F);
2980 assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
2981}
2982
2983/// CountRegisters - Note which registers are used by the given formula,
2984/// updating RegUses.
2985void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
2986 if (F.ScaledReg)
2987 RegUses.CountRegister(F.ScaledReg, LUIdx);
2988 for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
2989 E = F.BaseRegs.end(); I != E; ++I)
2990 RegUses.CountRegister(*I, LUIdx);
2991}
2992
2993/// InsertFormula - If the given formula has not yet been inserted, add it to
2994/// the list, and return true. Return false otherwise.
2995bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
Dan Gohman8c16b382010-02-22 04:11:59 +00002996 if (!LU.InsertFormula(F))
Dan Gohman45774ce2010-02-12 10:34:29 +00002997 return false;
2998
2999 CountRegisters(F, LUIdx);
3000 return true;
3001}
3002
3003/// CollectLoopInvariantFixupsAndFormulae - Check for other uses of
3004/// loop-invariant values which we're tracking. These other uses will pin these
3005/// values in registers, making them less profitable for elimination.
3006/// TODO: This currently misses non-constant addrec step registers.
3007/// TODO: Should this give more weight to users inside the loop?
3008void
3009LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3010 SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
3011 SmallPtrSet<const SCEV *, 8> Inserted;
3012
3013 while (!Worklist.empty()) {
3014 const SCEV *S = Worklist.pop_back_val();
3015
3016 if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
Dan Gohmandd41bba2010-06-21 19:47:52 +00003017 Worklist.append(N->op_begin(), N->op_end());
Dan Gohman45774ce2010-02-12 10:34:29 +00003018 else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
3019 Worklist.push_back(C->getOperand());
3020 else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
3021 Worklist.push_back(D->getLHS());
3022 Worklist.push_back(D->getRHS());
Chandler Carruthcdf47882014-03-09 03:16:01 +00003023 } else if (const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
3024 if (!Inserted.insert(US)) continue;
3025 const Value *V = US->getValue();
Dan Gohman67b44032010-06-04 23:16:05 +00003026 if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
3027 // Look for instructions defined outside the loop.
Dan Gohman45774ce2010-02-12 10:34:29 +00003028 if (L->contains(Inst)) continue;
Dan Gohman67b44032010-06-04 23:16:05 +00003029 } else if (isa<UndefValue>(V))
3030 // Undef doesn't have a live range, so it doesn't matter.
3031 continue;
Chandler Carruthcdf47882014-03-09 03:16:01 +00003032 for (const Use &U : V->uses()) {
3033 const Instruction *UserInst = dyn_cast<Instruction>(U.getUser());
Dan Gohman45774ce2010-02-12 10:34:29 +00003034 // Ignore non-instructions.
3035 if (!UserInst)
Dan Gohman045f8192010-01-22 00:46:49 +00003036 continue;
Dan Gohman45774ce2010-02-12 10:34:29 +00003037 // Ignore instructions in other functions (as can happen with
3038 // Constants).
3039 if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
Dan Gohman045f8192010-01-22 00:46:49 +00003040 continue;
Dan Gohman45774ce2010-02-12 10:34:29 +00003041 // Ignore instructions not dominated by the loop.
3042 const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
3043 UserInst->getParent() :
3044 cast<PHINode>(UserInst)->getIncomingBlock(
Chandler Carruthcdf47882014-03-09 03:16:01 +00003045 PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
Dan Gohman45774ce2010-02-12 10:34:29 +00003046 if (!DT.dominates(L->getHeader(), UseBB))
3047 continue;
3048 // Ignore uses which are part of other SCEV expressions, to avoid
3049 // analyzing them multiple times.
Dan Gohman42ec4eb2010-04-09 19:12:34 +00003050 if (SE.isSCEVable(UserInst->getType())) {
3051 const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));
3052 // If the user is a no-op, look through to its uses.
3053 if (!isa<SCEVUnknown>(UserS))
3054 continue;
Chandler Carruthcdf47882014-03-09 03:16:01 +00003055 if (UserS == US) {
Dan Gohman42ec4eb2010-04-09 19:12:34 +00003056 Worklist.push_back(
3057 SE.getUnknown(const_cast<Instruction *>(UserInst)));
3058 continue;
3059 }
3060 }
Dan Gohman45774ce2010-02-12 10:34:29 +00003061 // Ignore icmp instructions which are already being analyzed.
3062 if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
Chandler Carruthcdf47882014-03-09 03:16:01 +00003063 unsigned OtherIdx = !U.getOperandNo();
Dan Gohman45774ce2010-02-12 10:34:29 +00003064 Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
Dan Gohmanafd6db92010-11-17 21:23:15 +00003065 if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
Dan Gohman45774ce2010-02-12 10:34:29 +00003066 continue;
3067 }
3068
3069 LSRFixup &LF = getNewFixup();
3070 LF.UserInst = const_cast<Instruction *>(UserInst);
Chandler Carruthcdf47882014-03-09 03:16:01 +00003071 LF.OperandValToReplace = U;
Dan Gohman45774ce2010-02-12 10:34:29 +00003072 std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, 0);
3073 LF.LUIdx = P.first;
3074 LF.Offset = P.second;
3075 LSRUse &LU = Uses[LF.LUIdx];
Dan Gohmand006ab92010-04-07 22:27:08 +00003076 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
Dan Gohman14152082010-07-15 20:24:58 +00003077 if (!LU.WidestFixupType ||
3078 SE.getTypeSizeInBits(LU.WidestFixupType) <
3079 SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
3080 LU.WidestFixupType = LF.OperandValToReplace->getType();
Chandler Carruthcdf47882014-03-09 03:16:01 +00003081 InsertSupplementalFormula(US, LU, LF.LUIdx);
Dan Gohman45774ce2010-02-12 10:34:29 +00003082 CountRegisters(LU.Formulae.back(), Uses.size() - 1);
3083 break;
3084 }
3085 }
3086 }
3087}
3088
3089/// CollectSubexprs - Split S into subexpressions which can be pulled out into
3090/// separate registers. If C is non-null, multiply each subexpression by C.
Andrew Trickc8037062012-07-17 05:30:37 +00003091///
3092/// Return remainder expression after factoring the subexpressions captured by
3093/// Ops. If Ops is complete, return NULL.
3094static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
3095 SmallVectorImpl<const SCEV *> &Ops,
3096 const Loop *L,
3097 ScalarEvolution &SE,
3098 unsigned Depth = 0) {
3099 // Arbitrarily cap recursion to protect compile time.
3100 if (Depth >= 3)
3101 return S;
3102
Dan Gohman45774ce2010-02-12 10:34:29 +00003103 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
3104 // Break out add operands.
3105 for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
Andrew Trickc8037062012-07-17 05:30:37 +00003106 I != E; ++I) {
3107 const SCEV *Remainder = CollectSubexprs(*I, C, Ops, L, SE, Depth+1);
3108 if (Remainder)
3109 Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
3110 }
Jakub Staszak4898e622013-06-15 12:20:44 +00003111 return 0;
Dan Gohman45774ce2010-02-12 10:34:29 +00003112 } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
3113 // Split a non-zero base out of an addrec.
Andrew Trickc8037062012-07-17 05:30:37 +00003114 if (AR->getStart()->isZero())
3115 return S;
3116
3117 const SCEV *Remainder = CollectSubexprs(AR->getStart(),
3118 C, Ops, L, SE, Depth+1);
3119 // Split the non-zero AddRec unless it is part of a nested recurrence that
3120 // does not pertain to this loop.
3121 if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
3122 Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
Jakub Staszak4898e622013-06-15 12:20:44 +00003123 Remainder = 0;
Andrew Trickc8037062012-07-17 05:30:37 +00003124 }
3125 if (Remainder != AR->getStart()) {
3126 if (!Remainder)
3127 Remainder = SE.getConstant(AR->getType(), 0);
3128 return SE.getAddRecExpr(Remainder,
3129 AR->getStepRecurrence(SE),
3130 AR->getLoop(),
3131 //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
3132 SCEV::FlagAnyWrap);
Dan Gohman45774ce2010-02-12 10:34:29 +00003133 }
3134 } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
3135 // Break (C * (a + b + c)) into C*a + C*b + C*c.
Andrew Trickc8037062012-07-17 05:30:37 +00003136 if (Mul->getNumOperands() != 2)
3137 return S;
3138 if (const SCEVConstant *Op0 =
3139 dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
3140 C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;
3141 const SCEV *Remainder =
3142 CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
3143 if (Remainder)
3144 Ops.push_back(SE.getMulExpr(C, Remainder));
Jakub Staszak4898e622013-06-15 12:20:44 +00003145 return 0;
Andrew Trickc8037062012-07-17 05:30:37 +00003146 }
Dan Gohman45774ce2010-02-12 10:34:29 +00003147 }
Andrew Trickc8037062012-07-17 05:30:37 +00003148 return S;
Dan Gohman45774ce2010-02-12 10:34:29 +00003149}
3150
3151/// GenerateReassociations - Split out subexpressions from adds and the bases of
3152/// addrecs.
3153void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
3154 Formula Base,
3155 unsigned Depth) {
3156 // Arbitrarily cap recursion to protect compile time.
3157 if (Depth >= 3) return;
3158
3159 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
3160 const SCEV *BaseReg = Base.BaseRegs[i];
3161
Dan Gohman89fdbaf2010-08-16 15:50:00 +00003162 SmallVector<const SCEV *, 8> AddOps;
Andrew Trickc8037062012-07-17 05:30:37 +00003163 const SCEV *Remainder = CollectSubexprs(BaseReg, 0, AddOps, L, SE);
3164 if (Remainder)
3165 AddOps.push_back(Remainder);
Dan Gohmanfb9712b2010-06-25 22:32:18 +00003166
Dan Gohman45774ce2010-02-12 10:34:29 +00003167 if (AddOps.size() == 1) continue;
3168
3169 for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
3170 JE = AddOps.end(); J != JE; ++J) {
Dan Gohman89fdbaf2010-08-16 15:50:00 +00003171
3172 // Loop-variant "unknown" values are uninteresting; we won't be able to
3173 // do anything meaningful with them.
Dan Gohmanafd6db92010-11-17 21:23:15 +00003174 if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
Dan Gohman89fdbaf2010-08-16 15:50:00 +00003175 continue;
3176
Dan Gohman45774ce2010-02-12 10:34:29 +00003177 // Don't pull a constant into a register if the constant could be folded
3178 // into an immediate field.
Chandler Carruth26c59fa2013-01-07 14:41:08 +00003179 if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
3180 LU.AccessTy, *J, Base.getNumRegs() > 1))
Dan Gohman45774ce2010-02-12 10:34:29 +00003181 continue;
3182
3183 // Collect all operands except *J.
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +00003184 SmallVector<const SCEV *, 8> InnerAddOps(
3185 ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
3186 InnerAddOps.append(std::next(J),
3187 ((const SmallVector<const SCEV *, 8> &)AddOps).end());
Dan Gohman45774ce2010-02-12 10:34:29 +00003188
3189 // Don't leave just a constant behind in a register if the constant could
3190 // be folded into an immediate field.
3191 if (InnerAddOps.size() == 1 &&
Chandler Carruth26c59fa2013-01-07 14:41:08 +00003192 isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
3193 LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
Dan Gohman45774ce2010-02-12 10:34:29 +00003194 continue;
3195
Dan Gohman997bbc52010-04-23 01:55:05 +00003196 const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
3197 if (InnerSum->isZero())
3198 continue;
Dan Gohman45774ce2010-02-12 10:34:29 +00003199 Formula F = Base;
Dan Gohman6136e942011-05-03 00:46:49 +00003200
3201 // Add the remaining pieces of the add back into the new formula.
3202 const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
Chandler Carruth26c59fa2013-01-07 14:41:08 +00003203 if (InnerSumSC &&
Dan Gohman6136e942011-05-03 00:46:49 +00003204 SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
Chandler Carruth26c59fa2013-01-07 14:41:08 +00003205 TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
3206 InnerSumSC->getValue()->getZExtValue())) {
Dan Gohman6136e942011-05-03 00:46:49 +00003207 F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
3208 InnerSumSC->getValue()->getZExtValue();
3209 F.BaseRegs.erase(F.BaseRegs.begin() + i);
3210 } else
3211 F.BaseRegs[i] = InnerSum;
3212
3213 // Add J as its own register, or an unfolded immediate.
3214 const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
Chandler Carruth26c59fa2013-01-07 14:41:08 +00003215 if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
3216 TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
3217 SC->getValue()->getZExtValue()))
Dan Gohman6136e942011-05-03 00:46:49 +00003218 F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
3219 SC->getValue()->getZExtValue();
3220 else
3221 F.BaseRegs.push_back(*J);
3222
Dan Gohman45774ce2010-02-12 10:34:29 +00003223 if (InsertFormula(LU, LUIdx, F))
3224 // If that formula hadn't been seen before, recurse to find more like
3225 // it.
3226 GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1);
3227 }
3228 }
3229}
3230
3231/// GenerateCombinations - Generate a formula consisting of all of the
3232/// loop-dominating registers added into a single register.
3233void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
Dan Gohmane4e51a62010-02-14 18:51:39 +00003234 Formula Base) {
Dan Gohman8b0a4192010-03-01 17:49:51 +00003235 // This method is only interesting on a plurality of registers.
Dan Gohman45774ce2010-02-12 10:34:29 +00003236 if (Base.BaseRegs.size() <= 1) return;
3237
3238 Formula F = Base;
3239 F.BaseRegs.clear();
3240 SmallVector<const SCEV *, 4> Ops;
3241 for (SmallVectorImpl<const SCEV *>::const_iterator
3242 I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) {
3243 const SCEV *BaseReg = *I;
Dan Gohman20d9ce22010-11-17 21:41:58 +00003244 if (SE.properlyDominates(BaseReg, L->getHeader()) &&
Dan Gohmanafd6db92010-11-17 21:23:15 +00003245 !SE.hasComputableLoopEvolution(BaseReg, L))
Dan Gohman45774ce2010-02-12 10:34:29 +00003246 Ops.push_back(BaseReg);
3247 else
3248 F.BaseRegs.push_back(BaseReg);
3249 }
3250 if (Ops.size() > 1) {
Dan Gohmanbb7d5222010-02-14 18:50:49 +00003251 const SCEV *Sum = SE.getAddExpr(Ops);
3252 // TODO: If Sum is zero, it probably means ScalarEvolution missed an
3253 // opportunity to fold something. For now, just ignore such cases
Dan Gohman8b0a4192010-03-01 17:49:51 +00003254 // rather than proceed with zero in a register.
Dan Gohmanbb7d5222010-02-14 18:50:49 +00003255 if (!Sum->isZero()) {
3256 F.BaseRegs.push_back(Sum);
3257 (void)InsertFormula(LU, LUIdx, F);
3258 }
Dan Gohman45774ce2010-02-12 10:34:29 +00003259 }
3260}
3261
3262/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets.
3263void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
3264 Formula Base) {
3265 // We can't add a symbolic offset if the address already contains one.
Chandler Carruth6e479322013-01-07 15:04:40 +00003266 if (Base.BaseGV) return;
Dan Gohman45774ce2010-02-12 10:34:29 +00003267
3268 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
3269 const SCEV *G = Base.BaseRegs[i];
3270 GlobalValue *GV = ExtractSymbol(G, SE);
3271 if (G->isZero() || !GV)
3272 continue;
3273 Formula F = Base;
Chandler Carruth6e479322013-01-07 15:04:40 +00003274 F.BaseGV = GV;
Chandler Carruth26c59fa2013-01-07 14:41:08 +00003275 if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
Dan Gohman45774ce2010-02-12 10:34:29 +00003276 continue;
3277 F.BaseRegs[i] = G;
3278 (void)InsertFormula(LU, LUIdx, F);
3279 }
3280}
3281
3282/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
3283void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
3284 Formula Base) {
3285 // TODO: For now, just add the min and max offset, because it usually isn't
3286 // worthwhile looking at everything inbetween.
Dan Gohman4afd4122010-07-15 15:14:45 +00003287 SmallVector<int64_t, 2> Worklist;
Dan Gohman45774ce2010-02-12 10:34:29 +00003288 Worklist.push_back(LU.MinOffset);
3289 if (LU.MaxOffset != LU.MinOffset)
3290 Worklist.push_back(LU.MaxOffset);
3291
3292 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
3293 const SCEV *G = Base.BaseRegs[i];
3294
3295 for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
3296 E = Worklist.end(); I != E; ++I) {
3297 Formula F = Base;
Chandler Carruth6e479322013-01-07 15:04:40 +00003298 F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
Chandler Carruth26c59fa2013-01-07 14:41:08 +00003299 if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
3300 LU.AccessTy, F)) {
Dan Gohman4afd4122010-07-15 15:14:45 +00003301 // Add the offset to the base register.
Dan Gohman9b7632d2010-08-16 15:39:27 +00003302 const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
Dan Gohman4afd4122010-07-15 15:14:45 +00003303 // If it cancelled out, drop the base register, otherwise update it.
3304 if (NewG->isZero()) {
3305 std::swap(F.BaseRegs[i], F.BaseRegs.back());
3306 F.BaseRegs.pop_back();
3307 } else
3308 F.BaseRegs[i] = NewG;
Dan Gohman45774ce2010-02-12 10:34:29 +00003309
3310 (void)InsertFormula(LU, LUIdx, F);
3311 }
3312 }
3313
3314 int64_t Imm = ExtractImmediate(G, SE);
3315 if (G->isZero() || Imm == 0)
3316 continue;
3317 Formula F = Base;
Chandler Carruth6e479322013-01-07 15:04:40 +00003318 F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
Chandler Carruth26c59fa2013-01-07 14:41:08 +00003319 if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
Dan Gohman45774ce2010-02-12 10:34:29 +00003320 continue;
3321 F.BaseRegs[i] = G;
3322 (void)InsertFormula(LU, LUIdx, F);
3323 }
3324}
3325
3326/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up
3327/// the comparison. For example, x == y -> x*c == y*c.
3328void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
3329 Formula Base) {
3330 if (LU.Kind != LSRUse::ICmpZero) return;
3331
3332 // Determine the integer type for the base formula.
Chris Lattner229907c2011-07-18 04:54:35 +00003333 Type *IntTy = Base.getType();
Dan Gohman45774ce2010-02-12 10:34:29 +00003334 if (!IntTy) return;
3335 if (SE.getTypeSizeInBits(IntTy) > 64) return;
3336
3337 // Don't do this if there is more than one offset.
3338 if (LU.MinOffset != LU.MaxOffset) return;
3339
Chandler Carruth6e479322013-01-07 15:04:40 +00003340 assert(!Base.BaseGV && "ICmpZero use is not legal!");
Dan Gohman45774ce2010-02-12 10:34:29 +00003341
3342 // Check each interesting stride.
3343 for (SmallSetVector<int64_t, 8>::const_iterator
3344 I = Factors.begin(), E = Factors.end(); I != E; ++I) {
3345 int64_t Factor = *I;
Dan Gohman45774ce2010-02-12 10:34:29 +00003346
3347 // Check that the multiplication doesn't overflow.
Chandler Carruth6e479322013-01-07 15:04:40 +00003348 if (Base.BaseOffset == INT64_MIN && Factor == -1)
Dan Gohman5f10d6c2010-02-17 00:41:53 +00003349 continue;
Chandler Carruth6e479322013-01-07 15:04:40 +00003350 int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
3351 if (NewBaseOffset / Factor != Base.BaseOffset)
Dan Gohman45774ce2010-02-12 10:34:29 +00003352 continue;
Andrew Trick429e9ed2014-02-26 16:31:56 +00003353 // If the offset will be truncated at this use, check that it is in bounds.
3354 if (!IntTy->isPointerTy() &&
3355 !ConstantInt::isValueValidForType(IntTy, NewBaseOffset))
3356 continue;
Dan Gohman45774ce2010-02-12 10:34:29 +00003357
3358 // Check that multiplying with the use offset doesn't overflow.
3359 int64_t Offset = LU.MinOffset;
Dan Gohman5f10d6c2010-02-17 00:41:53 +00003360 if (Offset == INT64_MIN && Factor == -1)
3361 continue;
Dan Gohman45774ce2010-02-12 10:34:29 +00003362 Offset = (uint64_t)Offset * Factor;
Dan Gohman13ac3b22010-02-17 00:42:19 +00003363 if (Offset / Factor != LU.MinOffset)
Dan Gohman45774ce2010-02-12 10:34:29 +00003364 continue;
Andrew Trick429e9ed2014-02-26 16:31:56 +00003365 // If the offset will be truncated at this use, check that it is in bounds.
3366 if (!IntTy->isPointerTy() &&
3367 !ConstantInt::isValueValidForType(IntTy, Offset))
3368 continue;
Dan Gohman45774ce2010-02-12 10:34:29 +00003369
Dan Gohman963b1c12010-06-24 16:57:52 +00003370 Formula F = Base;
Chandler Carruth6e479322013-01-07 15:04:40 +00003371 F.BaseOffset = NewBaseOffset;
Dan Gohman963b1c12010-06-24 16:57:52 +00003372
Dan Gohman45774ce2010-02-12 10:34:29 +00003373 // Check that this scale is legal.
Chandler Carruth26c59fa2013-01-07 14:41:08 +00003374 if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
Dan Gohman45774ce2010-02-12 10:34:29 +00003375 continue;
3376
3377 // Compensate for the use having MinOffset built into it.
Chandler Carruth6e479322013-01-07 15:04:40 +00003378 F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset;
Dan Gohman45774ce2010-02-12 10:34:29 +00003379
Dan Gohman1d2ded72010-05-03 22:09:21 +00003380 const SCEV *FactorS = SE.getConstant(IntTy, Factor);
Dan Gohman45774ce2010-02-12 10:34:29 +00003381
3382 // Check that multiplying with each base register doesn't overflow.
3383 for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
3384 F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
Dan Gohman4eebb942010-02-19 19:35:48 +00003385 if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
Dan Gohman45774ce2010-02-12 10:34:29 +00003386 goto next;
3387 }
3388
3389 // Check that multiplying with the scaled register doesn't overflow.
3390 if (F.ScaledReg) {
3391 F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
Dan Gohman4eebb942010-02-19 19:35:48 +00003392 if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
Dan Gohman45774ce2010-02-12 10:34:29 +00003393 continue;
3394 }
3395
Dan Gohman6136e942011-05-03 00:46:49 +00003396 // Check that multiplying with the unfolded offset doesn't overflow.
3397 if (F.UnfoldedOffset != 0) {
Dan Gohman6c4a3192011-05-23 21:07:39 +00003398 if (F.UnfoldedOffset == INT64_MIN && Factor == -1)
3399 continue;
Dan Gohman6136e942011-05-03 00:46:49 +00003400 F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
3401 if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
3402 continue;
Andrew Trick429e9ed2014-02-26 16:31:56 +00003403 // If the offset will be truncated, check that it is in bounds.
3404 if (!IntTy->isPointerTy() &&
3405 !ConstantInt::isValueValidForType(IntTy, F.UnfoldedOffset))
3406 continue;
Dan Gohman6136e942011-05-03 00:46:49 +00003407 }
3408
Dan Gohman45774ce2010-02-12 10:34:29 +00003409 // If we make it here and it's legal, add it.
3410 (void)InsertFormula(LU, LUIdx, F);
3411 next:;
3412 }
3413}
3414
3415/// GenerateScales - Generate stride factor reuse formulae by making use of
3416/// scaled-offset address modes, for example.
Dan Gohmanab5fb7f2010-05-20 19:44:23 +00003417void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
Dan Gohman45774ce2010-02-12 10:34:29 +00003418 // Determine the integer type for the base formula.
Chris Lattner229907c2011-07-18 04:54:35 +00003419 Type *IntTy = Base.getType();
Dan Gohman45774ce2010-02-12 10:34:29 +00003420 if (!IntTy) return;
3421
3422 // If this Formula already has a scaled register, we can't add another one.
Chandler Carruth6e479322013-01-07 15:04:40 +00003423 if (Base.Scale != 0) return;
Dan Gohman45774ce2010-02-12 10:34:29 +00003424
3425 // Check each interesting stride.
3426 for (SmallSetVector<int64_t, 8>::const_iterator
3427 I = Factors.begin(), E = Factors.end(); I != E; ++I) {
3428 int64_t Factor = *I;
3429
Chandler Carruth6e479322013-01-07 15:04:40 +00003430 Base.Scale = Factor;
3431 Base.HasBaseReg = Base.BaseRegs.size() > 1;
Dan Gohman45774ce2010-02-12 10:34:29 +00003432 // Check whether this scale is going to be legal.
Chandler Carruth26c59fa2013-01-07 14:41:08 +00003433 if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
3434 Base)) {
Dan Gohman45774ce2010-02-12 10:34:29 +00003435 // As a special-case, handle special out-of-loop Basic users specially.
3436 // TODO: Reconsider this special case.
3437 if (LU.Kind == LSRUse::Basic &&
Chandler Carruth26c59fa2013-01-07 14:41:08 +00003438 isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
3439 LU.AccessTy, Base) &&
Dan Gohman45774ce2010-02-12 10:34:29 +00003440 LU.AllFixupsOutsideLoop)
3441 LU.Kind = LSRUse::Special;
3442 else
3443 continue;
3444 }
3445 // For an ICmpZero, negating a solitary base register won't lead to
3446 // new solutions.
3447 if (LU.Kind == LSRUse::ICmpZero &&
Chandler Carruth6e479322013-01-07 15:04:40 +00003448 !Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV)
Dan Gohman45774ce2010-02-12 10:34:29 +00003449 continue;
3450 // For each addrec base reg, apply the scale, if possible.
3451 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3452 if (const SCEVAddRecExpr *AR =
3453 dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
Dan Gohman1d2ded72010-05-03 22:09:21 +00003454 const SCEV *FactorS = SE.getConstant(IntTy, Factor);
Dan Gohman45774ce2010-02-12 10:34:29 +00003455 if (FactorS->isZero())
3456 continue;
3457 // Divide out the factor, ignoring high bits, since we'll be
3458 // scaling the value back up in the end.
Dan Gohman4eebb942010-02-19 19:35:48 +00003459 if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) {
Dan Gohman45774ce2010-02-12 10:34:29 +00003460 // TODO: This could be optimized to avoid all the copying.
3461 Formula F = Base;
3462 F.ScaledReg = Quotient;
Dan Gohman80a96082010-05-20 15:17:54 +00003463 F.DeleteBaseReg(F.BaseRegs[i]);
Dan Gohman45774ce2010-02-12 10:34:29 +00003464 (void)InsertFormula(LU, LUIdx, F);
3465 }
3466 }
3467 }
3468}
3469
3470/// GenerateTruncates - Generate reuse formulae from different IV types.
Dan Gohmanab5fb7f2010-05-20 19:44:23 +00003471void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
Dan Gohman45774ce2010-02-12 10:34:29 +00003472 // Don't bother truncating symbolic values.
Chandler Carruth6e479322013-01-07 15:04:40 +00003473 if (Base.BaseGV) return;
Dan Gohman45774ce2010-02-12 10:34:29 +00003474
3475 // Determine the integer type for the base formula.
Chris Lattner229907c2011-07-18 04:54:35 +00003476 Type *DstTy = Base.getType();
Dan Gohman45774ce2010-02-12 10:34:29 +00003477 if (!DstTy) return;
3478 DstTy = SE.getEffectiveSCEVType(DstTy);
3479
Chris Lattner229907c2011-07-18 04:54:35 +00003480 for (SmallSetVector<Type *, 4>::const_iterator
Dan Gohman45774ce2010-02-12 10:34:29 +00003481 I = Types.begin(), E = Types.end(); I != E; ++I) {
Chris Lattner229907c2011-07-18 04:54:35 +00003482 Type *SrcTy = *I;
Chandler Carruth26c59fa2013-01-07 14:41:08 +00003483 if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
Dan Gohman45774ce2010-02-12 10:34:29 +00003484 Formula F = Base;
3485
3486 if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
3487 for (SmallVectorImpl<const SCEV *>::iterator J = F.BaseRegs.begin(),
3488 JE = F.BaseRegs.end(); J != JE; ++J)
3489 *J = SE.getAnyExtendExpr(*J, SrcTy);
3490
3491 // TODO: This assumes we've done basic processing on all uses and
3492 // have an idea what the register usage is.
3493 if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
3494 continue;
3495
3496 (void)InsertFormula(LU, LUIdx, F);
3497 }
3498 }
3499}
3500
3501namespace {
3502
Dan Gohmane7f74bb2010-02-14 18:51:20 +00003503/// WorkItem - Helper class for GenerateCrossUseConstantOffsets. It's used to
Dan Gohman45774ce2010-02-12 10:34:29 +00003504/// defer modifications so that the search phase doesn't have to worry about
3505/// the data structures moving underneath it.
3506struct WorkItem {
3507 size_t LUIdx;
3508 int64_t Imm;
3509 const SCEV *OrigReg;
3510
3511 WorkItem(size_t LI, int64_t I, const SCEV *R)
3512 : LUIdx(LI), Imm(I), OrigReg(R) {}
3513
3514 void print(raw_ostream &OS) const;
3515 void dump() const;
3516};
3517
3518}
3519
3520void WorkItem::print(raw_ostream &OS) const {
3521 OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
3522 << " , add offset " << Imm;
3523}
3524
Manman Ren49d684e2012-09-12 05:06:18 +00003525#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Dan Gohman45774ce2010-02-12 10:34:29 +00003526void WorkItem::dump() const {
3527 print(errs()); errs() << '\n';
3528}
Manman Renc3366cc2012-09-06 19:55:56 +00003529#endif
Dan Gohman45774ce2010-02-12 10:34:29 +00003530
3531/// GenerateCrossUseConstantOffsets - Look for registers which are a constant
3532/// distance apart and try to form reuse opportunities between them.
3533void LSRInstance::GenerateCrossUseConstantOffsets() {
3534 // Group the registers by their value without any added constant offset.
3535 typedef std::map<int64_t, const SCEV *> ImmMapTy;
3536 typedef DenseMap<const SCEV *, ImmMapTy> RegMapTy;
3537 RegMapTy Map;
3538 DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
3539 SmallVector<const SCEV *, 8> Sequence;
3540 for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
3541 I != E; ++I) {
3542 const SCEV *Reg = *I;
3543 int64_t Imm = ExtractImmediate(Reg, SE);
3544 std::pair<RegMapTy::iterator, bool> Pair =
3545 Map.insert(std::make_pair(Reg, ImmMapTy()));
3546 if (Pair.second)
3547 Sequence.push_back(Reg);
3548 Pair.first->second.insert(std::make_pair(Imm, *I));
3549 UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(*I);
3550 }
3551
3552 // Now examine each set of registers with the same base value. Build up
3553 // a list of work to do and do the work in a separate step so that we're
3554 // not adding formulae and register counts while we're searching.
Dan Gohman110ed642010-09-01 01:45:53 +00003555 SmallVector<WorkItem, 32> WorkItems;
3556 SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems;
Dan Gohman45774ce2010-02-12 10:34:29 +00003557 for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(),
3558 E = Sequence.end(); I != E; ++I) {
3559 const SCEV *Reg = *I;
3560 const ImmMapTy &Imms = Map.find(Reg)->second;
3561
Dan Gohman363f8472010-02-12 19:20:37 +00003562 // It's not worthwhile looking for reuse if there's only one offset.
3563 if (Imms.size() == 1)
3564 continue;
3565
Dan Gohman45774ce2010-02-12 10:34:29 +00003566 DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
3567 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
3568 J != JE; ++J)
3569 dbgs() << ' ' << J->first;
3570 dbgs() << '\n');
3571
3572 // Examine each offset.
3573 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
3574 J != JE; ++J) {
3575 const SCEV *OrigReg = J->second;
3576
3577 int64_t JImm = J->first;
3578 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
3579
3580 if (!isa<SCEVConstant>(OrigReg) &&
3581 UsedByIndicesMap[Reg].count() == 1) {
3582 DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg << '\n');
3583 continue;
3584 }
3585
3586 // Conservatively examine offsets between this orig reg a few selected
3587 // other orig regs.
3588 ImmMapTy::const_iterator OtherImms[] = {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +00003589 Imms.begin(), std::prev(Imms.end()),
3590 Imms.lower_bound((Imms.begin()->first + std::prev(Imms.end())->first) /
3591 2)
Dan Gohman45774ce2010-02-12 10:34:29 +00003592 };
3593 for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
3594 ImmMapTy::const_iterator M = OtherImms[i];
Dan Gohman363f8472010-02-12 19:20:37 +00003595 if (M == J || M == JE) continue;
Dan Gohman45774ce2010-02-12 10:34:29 +00003596
3597 // Compute the difference between the two.
3598 int64_t Imm = (uint64_t)JImm - M->first;
3599 for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1;
Dan Gohman110ed642010-09-01 01:45:53 +00003600 LUIdx = UsedByIndices.find_next(LUIdx))
Dan Gohman45774ce2010-02-12 10:34:29 +00003601 // Make a memo of this use, offset, and register tuple.
Dan Gohman110ed642010-09-01 01:45:53 +00003602 if (UniqueItems.insert(std::make_pair(LUIdx, Imm)))
3603 WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
Evan Cheng85a9f432009-11-12 07:35:05 +00003604 }
3605 }
3606 }
3607
Dan Gohman45774ce2010-02-12 10:34:29 +00003608 Map.clear();
3609 Sequence.clear();
3610 UsedByIndicesMap.clear();
Dan Gohman110ed642010-09-01 01:45:53 +00003611 UniqueItems.clear();
Dan Gohman45774ce2010-02-12 10:34:29 +00003612
3613 // Now iterate through the worklist and add new formulae.
3614 for (SmallVectorImpl<WorkItem>::const_iterator I = WorkItems.begin(),
3615 E = WorkItems.end(); I != E; ++I) {
3616 const WorkItem &WI = *I;
3617 size_t LUIdx = WI.LUIdx;
3618 LSRUse &LU = Uses[LUIdx];
3619 int64_t Imm = WI.Imm;
3620 const SCEV *OrigReg = WI.OrigReg;
3621
Chris Lattner229907c2011-07-18 04:54:35 +00003622 Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
Dan Gohman45774ce2010-02-12 10:34:29 +00003623 const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
3624 unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
3625
Dan Gohman8b0a4192010-03-01 17:49:51 +00003626 // TODO: Use a more targeted data structure.
Dan Gohman45774ce2010-02-12 10:34:29 +00003627 for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
Dan Gohman86110fa2010-05-20 22:25:20 +00003628 const Formula &F = LU.Formulae[L];
Dan Gohman45774ce2010-02-12 10:34:29 +00003629 // Use the immediate in the scaled register.
3630 if (F.ScaledReg == OrigReg) {
Chandler Carruth6e479322013-01-07 15:04:40 +00003631 int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
Dan Gohman45774ce2010-02-12 10:34:29 +00003632 // Don't create 50 + reg(-50).
3633 if (F.referencesReg(SE.getSCEV(
Chandler Carruth6e479322013-01-07 15:04:40 +00003634 ConstantInt::get(IntTy, -(uint64_t)Offset))))
Dan Gohman45774ce2010-02-12 10:34:29 +00003635 continue;
3636 Formula NewF = F;
Chandler Carruth6e479322013-01-07 15:04:40 +00003637 NewF.BaseOffset = Offset;
Chandler Carruth26c59fa2013-01-07 14:41:08 +00003638 if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
3639 NewF))
Dan Gohman45774ce2010-02-12 10:34:29 +00003640 continue;
3641 NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
3642
3643 // If the new scale is a constant in a register, and adding the constant
3644 // value to the immediate would produce a value closer to zero than the
3645 // immediate itself, then the formula isn't worthwhile.
3646 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
Chris Lattnerb1a15122011-07-15 06:08:15 +00003647 if (C->getValue()->isNegative() !=
Chandler Carruth6e479322013-01-07 15:04:40 +00003648 (NewF.BaseOffset < 0) &&
3649 (C->getValue()->getValue().abs() * APInt(BitWidth, F.Scale))
3650 .ule(abs64(NewF.BaseOffset)))
Dan Gohman45774ce2010-02-12 10:34:29 +00003651 continue;
3652
3653 // OK, looks good.
3654 (void)InsertFormula(LU, LUIdx, NewF);
3655 } else {
3656 // Use the immediate in a base register.
3657 for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
3658 const SCEV *BaseReg = F.BaseRegs[N];
3659 if (BaseReg != OrigReg)
3660 continue;
3661 Formula NewF = F;
Chandler Carruth6e479322013-01-07 15:04:40 +00003662 NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
Chandler Carruth26c59fa2013-01-07 14:41:08 +00003663 if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
3664 LU.Kind, LU.AccessTy, NewF)) {
3665 if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
Dan Gohman6136e942011-05-03 00:46:49 +00003666 continue;
3667 NewF = F;
3668 NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
3669 }
Dan Gohman45774ce2010-02-12 10:34:29 +00003670 NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
3671
3672 // If the new formula has a constant in a register, and adding the
3673 // constant value to the immediate would produce a value closer to
3674 // zero than the immediate itself, then the formula isn't worthwhile.
3675 for (SmallVectorImpl<const SCEV *>::const_iterator
3676 J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end();
3677 J != JE; ++J)
3678 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J))
Chandler Carruth6e479322013-01-07 15:04:40 +00003679 if ((C->getValue()->getValue() + NewF.BaseOffset).abs().slt(
3680 abs64(NewF.BaseOffset)) &&
Dan Gohman50f8f2c2010-05-18 23:48:08 +00003681 (C->getValue()->getValue() +
Chandler Carruth6e479322013-01-07 15:04:40 +00003682 NewF.BaseOffset).countTrailingZeros() >=
Michael J. Spencerdf1ecbd72013-05-24 22:23:49 +00003683 countTrailingZeros<uint64_t>(NewF.BaseOffset))
Dan Gohman45774ce2010-02-12 10:34:29 +00003684 goto skip_formula;
3685
3686 // Ok, looks good.
3687 (void)InsertFormula(LU, LUIdx, NewF);
3688 break;
3689 skip_formula:;
3690 }
3691 }
3692 }
3693 }
Dale Johannesen02cb2bf2009-05-11 17:15:42 +00003694}
3695
Dan Gohman45774ce2010-02-12 10:34:29 +00003696/// GenerateAllReuseFormulae - Generate formulae for each use.
3697void
3698LSRInstance::GenerateAllReuseFormulae() {
Dan Gohman521efe62010-02-16 01:42:53 +00003699 // This is split into multiple loops so that hasRegsUsedByUsesOtherThan
Dan Gohman45774ce2010-02-12 10:34:29 +00003700 // queries are more precise.
3701 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3702 LSRUse &LU = Uses[LUIdx];
3703 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3704 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
3705 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3706 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
3707 }
3708 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3709 LSRUse &LU = Uses[LUIdx];
3710 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3711 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
3712 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3713 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
3714 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3715 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
3716 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3717 GenerateScales(LU, LUIdx, LU.Formulae[i]);
Dan Gohman521efe62010-02-16 01:42:53 +00003718 }
3719 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3720 LSRUse &LU = Uses[LUIdx];
Dan Gohman45774ce2010-02-12 10:34:29 +00003721 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3722 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
3723 }
3724
3725 GenerateCrossUseConstantOffsets();
Dan Gohmanbf673e02010-08-29 15:21:38 +00003726
3727 DEBUG(dbgs() << "\n"
3728 "After generating reuse formulae:\n";
3729 print_uses(dbgs()));
Dan Gohman45774ce2010-02-12 10:34:29 +00003730}
3731
Dan Gohman1b61fd92010-10-07 23:43:09 +00003732/// If there are multiple formulae with the same set of registers used
Dan Gohman45774ce2010-02-12 10:34:29 +00003733/// by other uses, pick the best one and delete the others.
3734void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
Dan Gohman5947e162010-10-07 23:52:18 +00003735 DenseSet<const SCEV *> VisitedRegs;
3736 SmallPtrSet<const SCEV *, 16> Regs;
Andrew Trick5df90962011-12-06 03:13:31 +00003737 SmallPtrSet<const SCEV *, 16> LoserRegs;
Dan Gohman45774ce2010-02-12 10:34:29 +00003738#ifndef NDEBUG
Dan Gohman4c4043c2010-05-20 20:05:31 +00003739 bool ChangedFormulae = false;
Dan Gohman45774ce2010-02-12 10:34:29 +00003740#endif
3741
3742 // Collect the best formula for each unique set of shared registers. This
3743 // is reset for each use.
Preston Gurd25c3b6a2013-02-01 20:41:27 +00003744 typedef DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>
Dan Gohman45774ce2010-02-12 10:34:29 +00003745 BestFormulaeTy;
3746 BestFormulaeTy BestFormulae;
3747
3748 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3749 LSRUse &LU = Uses[LUIdx];
Dan Gohmanab5fb7f2010-05-20 19:44:23 +00003750 DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n');
Dan Gohman45774ce2010-02-12 10:34:29 +00003751
Dan Gohman4cf99b52010-05-18 23:42:37 +00003752 bool Any = false;
Dan Gohman45774ce2010-02-12 10:34:29 +00003753 for (size_t FIdx = 0, NumForms = LU.Formulae.size();
3754 FIdx != NumForms; ++FIdx) {
3755 Formula &F = LU.Formulae[FIdx];
3756
Andrew Trick5df90962011-12-06 03:13:31 +00003757 // Some formulas are instant losers. For example, they may depend on
3758 // nonexistent AddRecs from other loops. These need to be filtered
3759 // immediately, otherwise heuristics could choose them over others leading
3760 // to an unsatisfactory solution. Passing LoserRegs into RateFormula here
3761 // avoids the need to recompute this information across formulae using the
3762 // same bad AddRec. Passing LoserRegs is also essential unless we remove
3763 // the corresponding bad register from the Regs set.
3764 Cost CostF;
3765 Regs.clear();
Quentin Colombet8aa7abe2013-05-31 17:20:29 +00003766 CostF.RateFormula(TTI, F, Regs, VisitedRegs, L, LU.Offsets, SE, DT, LU,
Andrew Trick5df90962011-12-06 03:13:31 +00003767 &LoserRegs);
3768 if (CostF.isLoser()) {
3769 // During initial formula generation, undesirable formulae are generated
3770 // by uses within other loops that have some non-trivial address mode or
3771 // use the postinc form of the IV. LSR needs to provide these formulae
3772 // as the basis of rediscovering the desired formula that uses an AddRec
3773 // corresponding to the existing phi. Once all formulae have been
3774 // generated, these initial losers may be pruned.
3775 DEBUG(dbgs() << " Filtering loser "; F.print(dbgs());
3776 dbgs() << "\n");
Dan Gohman45774ce2010-02-12 10:34:29 +00003777 }
Andrew Trick5df90962011-12-06 03:13:31 +00003778 else {
Preston Gurd25c3b6a2013-02-01 20:41:27 +00003779 SmallVector<const SCEV *, 4> Key;
Andrew Trick5df90962011-12-06 03:13:31 +00003780 for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
3781 JE = F.BaseRegs.end(); J != JE; ++J) {
3782 const SCEV *Reg = *J;
3783 if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
3784 Key.push_back(Reg);
3785 }
3786 if (F.ScaledReg &&
3787 RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
3788 Key.push_back(F.ScaledReg);
3789 // Unstable sort by host order ok, because this is only used for
3790 // uniquifying.
3791 std::sort(Key.begin(), Key.end());
Dan Gohman45774ce2010-02-12 10:34:29 +00003792
Andrew Trick5df90962011-12-06 03:13:31 +00003793 std::pair<BestFormulaeTy::const_iterator, bool> P =
3794 BestFormulae.insert(std::make_pair(Key, FIdx));
3795 if (P.second)
3796 continue;
3797
Dan Gohman45774ce2010-02-12 10:34:29 +00003798 Formula &Best = LU.Formulae[P.first->second];
Dan Gohman5947e162010-10-07 23:52:18 +00003799
Dan Gohman5947e162010-10-07 23:52:18 +00003800 Cost CostBest;
Dan Gohman5947e162010-10-07 23:52:18 +00003801 Regs.clear();
Quentin Colombet8aa7abe2013-05-31 17:20:29 +00003802 CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, LU.Offsets, SE,
3803 DT, LU);
Dan Gohman5947e162010-10-07 23:52:18 +00003804 if (CostF < CostBest)
Dan Gohman45774ce2010-02-12 10:34:29 +00003805 std::swap(F, Best);
Dan Gohman8aca7ef2010-05-18 22:37:37 +00003806 DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
Dan Gohman45774ce2010-02-12 10:34:29 +00003807 dbgs() << "\n"
Dan Gohman8aca7ef2010-05-18 22:37:37 +00003808 " in favor of formula "; Best.print(dbgs());
Dan Gohman45774ce2010-02-12 10:34:29 +00003809 dbgs() << '\n');
Dan Gohman45774ce2010-02-12 10:34:29 +00003810 }
Andrew Trick5df90962011-12-06 03:13:31 +00003811#ifndef NDEBUG
3812 ChangedFormulae = true;
3813#endif
3814 LU.DeleteFormula(F);
3815 --FIdx;
3816 --NumForms;
3817 Any = true;
Dan Gohmand0800242010-05-07 23:36:59 +00003818 }
3819
Dan Gohmanbeebef42010-05-18 23:55:57 +00003820 // Now that we've filtered out some formulae, recompute the Regs set.
Dan Gohman4cf99b52010-05-18 23:42:37 +00003821 if (Any)
3822 LU.RecomputeRegs(LUIdx, RegUses);
Dan Gohmand0800242010-05-07 23:36:59 +00003823
3824 // Reset this to prepare for the next use.
Dan Gohman45774ce2010-02-12 10:34:29 +00003825 BestFormulae.clear();
3826 }
3827
Dan Gohman4c4043c2010-05-20 20:05:31 +00003828 DEBUG(if (ChangedFormulae) {
Dan Gohman5b18f032010-02-13 02:06:02 +00003829 dbgs() << "\n"
3830 "After filtering out undesirable candidates:\n";
Dan Gohman45774ce2010-02-12 10:34:29 +00003831 print_uses(dbgs());
3832 });
3833}
3834
Dan Gohmana4eca052010-05-18 22:51:59 +00003835// This is a rough guess that seems to work fairly well.
3836static const size_t ComplexityLimit = UINT16_MAX;
3837
3838/// EstimateSearchSpaceComplexity - Estimate the worst-case number of
3839/// solutions the solver might have to consider. It almost never considers
3840/// this many solutions because it prune the search space, but the pruning
3841/// isn't always sufficient.
3842size_t LSRInstance::EstimateSearchSpaceComplexity() const {
Dan Gohman49d638b2010-10-07 23:37:58 +00003843 size_t Power = 1;
Dan Gohmana4eca052010-05-18 22:51:59 +00003844 for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
3845 E = Uses.end(); I != E; ++I) {
3846 size_t FSize = I->Formulae.size();
3847 if (FSize >= ComplexityLimit) {
3848 Power = ComplexityLimit;
3849 break;
3850 }
3851 Power *= FSize;
3852 if (Power >= ComplexityLimit)
3853 break;
3854 }
3855 return Power;
3856}
3857
Dan Gohmane9e08732010-08-29 16:09:42 +00003858/// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset
3859/// of the registers of another formula, it won't help reduce register
3860/// pressure (though it may not necessarily hurt register pressure); remove
3861/// it to simplify the system.
3862void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
Dan Gohman20fab452010-05-19 23:43:12 +00003863 if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
3864 DEBUG(dbgs() << "The search space is too complex.\n");
3865
3866 DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
3867 "which use a superset of registers used by other "
3868 "formulae.\n");
3869
3870 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3871 LSRUse &LU = Uses[LUIdx];
3872 bool Any = false;
3873 for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
3874 Formula &F = LU.Formulae[i];
Dan Gohman8ec018c2010-05-20 20:00:41 +00003875 // Look for a formula with a constant or GV in a register. If the use
3876 // also has a formula with that same value in an immediate field,
3877 // delete the one that uses a register.
Dan Gohman20fab452010-05-19 23:43:12 +00003878 for (SmallVectorImpl<const SCEV *>::const_iterator
3879 I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
3880 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
3881 Formula NewF = F;
Chandler Carruth6e479322013-01-07 15:04:40 +00003882 NewF.BaseOffset += C->getValue()->getSExtValue();
Dan Gohman20fab452010-05-19 23:43:12 +00003883 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
3884 (I - F.BaseRegs.begin()));
3885 if (LU.HasFormulaWithSameRegs(NewF)) {
3886 DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
3887 LU.DeleteFormula(F);
3888 --i;
3889 --e;
3890 Any = true;
3891 break;
3892 }
3893 } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
3894 if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
Chandler Carruth6e479322013-01-07 15:04:40 +00003895 if (!F.BaseGV) {
Dan Gohman20fab452010-05-19 23:43:12 +00003896 Formula NewF = F;
Chandler Carruth6e479322013-01-07 15:04:40 +00003897 NewF.BaseGV = GV;
Dan Gohman20fab452010-05-19 23:43:12 +00003898 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
3899 (I - F.BaseRegs.begin()));
3900 if (LU.HasFormulaWithSameRegs(NewF)) {
3901 DEBUG(dbgs() << " Deleting "; F.print(dbgs());
3902 dbgs() << '\n');
3903 LU.DeleteFormula(F);
3904 --i;
3905 --e;
3906 Any = true;
3907 break;
3908 }
3909 }
3910 }
3911 }
3912 }
3913 if (Any)
3914 LU.RecomputeRegs(LUIdx, RegUses);
3915 }
3916
3917 DEBUG(dbgs() << "After pre-selection:\n";
3918 print_uses(dbgs()));
3919 }
Dan Gohmane9e08732010-08-29 16:09:42 +00003920}
Dan Gohman20fab452010-05-19 23:43:12 +00003921
Dan Gohmane9e08732010-08-29 16:09:42 +00003922/// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers
3923/// for expressions like A, A+1, A+2, etc., allocate a single register for
3924/// them.
3925void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
Jakub Staszak11bd8352013-02-16 16:08:15 +00003926 if (EstimateSearchSpaceComplexity() < ComplexityLimit)
3927 return;
Dan Gohman20fab452010-05-19 23:43:12 +00003928
Jakub Staszak11bd8352013-02-16 16:08:15 +00003929 DEBUG(dbgs() << "The search space is too complex.\n"
3930 "Narrowing the search space by assuming that uses separated "
3931 "by a constant offset will use the same registers.\n");
Dan Gohman20fab452010-05-19 23:43:12 +00003932
Jakub Staszak11bd8352013-02-16 16:08:15 +00003933 // This is especially useful for unrolled loops.
Dan Gohman8ec018c2010-05-20 20:00:41 +00003934
Jakub Staszak11bd8352013-02-16 16:08:15 +00003935 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3936 LSRUse &LU = Uses[LUIdx];
3937 for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
3938 E = LU.Formulae.end(); I != E; ++I) {
3939 const Formula &F = *I;
3940 if (F.BaseOffset == 0 || F.Scale != 0)
3941 continue;
Dan Gohman20fab452010-05-19 23:43:12 +00003942
Jakub Staszak11bd8352013-02-16 16:08:15 +00003943 LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
3944 if (!LUThatHas)
3945 continue;
Dan Gohman20fab452010-05-19 23:43:12 +00003946
Jakub Staszak11bd8352013-02-16 16:08:15 +00003947 if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false,
3948 LU.Kind, LU.AccessTy))
3949 continue;
Dan Gohman110ed642010-09-01 01:45:53 +00003950
Jakub Staszak11bd8352013-02-16 16:08:15 +00003951 DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n');
Dan Gohman2fd85d72010-10-08 19:33:26 +00003952
Jakub Staszak11bd8352013-02-16 16:08:15 +00003953 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
3954
3955 // Update the relocs to reference the new use.
3956 for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
3957 E = Fixups.end(); I != E; ++I) {
3958 LSRFixup &Fixup = *I;
3959 if (Fixup.LUIdx == LUIdx) {
3960 Fixup.LUIdx = LUThatHas - &Uses.front();
3961 Fixup.Offset += F.BaseOffset;
3962 // Add the new offset to LUThatHas' offset list.
3963 if (LUThatHas->Offsets.back() != Fixup.Offset) {
3964 LUThatHas->Offsets.push_back(Fixup.Offset);
3965 if (Fixup.Offset > LUThatHas->MaxOffset)
3966 LUThatHas->MaxOffset = Fixup.Offset;
3967 if (Fixup.Offset < LUThatHas->MinOffset)
3968 LUThatHas->MinOffset = Fixup.Offset;
Dan Gohman20fab452010-05-19 23:43:12 +00003969 }
Jakub Staszak11bd8352013-02-16 16:08:15 +00003970 DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');
3971 }
3972 if (Fixup.LUIdx == NumUses-1)
3973 Fixup.LUIdx = LUIdx;
3974 }
3975
3976 // Delete formulae from the new use which are no longer legal.
3977 bool Any = false;
3978 for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
3979 Formula &F = LUThatHas->Formulae[i];
3980 if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
3981 LUThatHas->Kind, LUThatHas->AccessTy, F)) {
3982 DEBUG(dbgs() << " Deleting "; F.print(dbgs());
3983 dbgs() << '\n');
3984 LUThatHas->DeleteFormula(F);
3985 --i;
3986 --e;
3987 Any = true;
Dan Gohman20fab452010-05-19 23:43:12 +00003988 }
3989 }
Dan Gohman20fab452010-05-19 23:43:12 +00003990
Jakub Staszak11bd8352013-02-16 16:08:15 +00003991 if (Any)
3992 LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
3993
3994 // Delete the old use.
3995 DeleteUse(LU, LUIdx);
3996 --LUIdx;
3997 --NumUses;
3998 break;
3999 }
Dan Gohman20fab452010-05-19 23:43:12 +00004000 }
Jakub Staszak11bd8352013-02-16 16:08:15 +00004001
4002 DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
Dan Gohmane9e08732010-08-29 16:09:42 +00004003}
Dan Gohman20fab452010-05-19 23:43:12 +00004004
Andrew Trick8b55b732011-03-14 16:50:06 +00004005/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
Dan Gohman002ff892010-08-29 16:39:22 +00004006/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that
4007/// we've done more filtering, as it may be able to find more formulae to
4008/// eliminate.
4009void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
4010 if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
4011 DEBUG(dbgs() << "The search space is too complex.\n");
4012
4013 DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
4014 "undesirable dedicated registers.\n");
4015
4016 FilterOutUndesirableDedicatedRegisters();
4017
4018 DEBUG(dbgs() << "After pre-selection:\n";
4019 print_uses(dbgs()));
4020 }
4021}
4022
Dan Gohmane9e08732010-08-29 16:09:42 +00004023/// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely
4024/// to be profitable, and then in any use which has any reference to that
4025/// register, delete all formulae which do not reference that register.
4026void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
Dan Gohmana4ca28a2010-05-20 20:52:00 +00004027 // With all other options exhausted, loop until the system is simple
4028 // enough to handle.
Dan Gohman45774ce2010-02-12 10:34:29 +00004029 SmallPtrSet<const SCEV *, 4> Taken;
Dan Gohmana4eca052010-05-18 22:51:59 +00004030 while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
Dan Gohman45774ce2010-02-12 10:34:29 +00004031 // Ok, we have too many of formulae on our hands to conveniently handle.
4032 // Use a rough heuristic to thin out the list.
Dan Gohman63e90152010-05-18 22:41:32 +00004033 DEBUG(dbgs() << "The search space is too complex.\n");
Dan Gohman45774ce2010-02-12 10:34:29 +00004034
4035 // Pick the register which is used by the most LSRUses, which is likely
4036 // to be a good reuse register candidate.
4037 const SCEV *Best = 0;
4038 unsigned BestNum = 0;
4039 for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
4040 I != E; ++I) {
4041 const SCEV *Reg = *I;
4042 if (Taken.count(Reg))
4043 continue;
4044 if (!Best)
4045 Best = Reg;
4046 else {
4047 unsigned Count = RegUses.getUsedByIndices(Reg).count();
4048 if (Count > BestNum) {
4049 Best = Reg;
4050 BestNum = Count;
4051 }
4052 }
4053 }
4054
4055 DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
Dan Gohman8b0a4192010-03-01 17:49:51 +00004056 << " will yield profitable reuse.\n");
Dan Gohman45774ce2010-02-12 10:34:29 +00004057 Taken.insert(Best);
4058
4059 // In any use with formulae which references this register, delete formulae
4060 // which don't reference it.
Dan Gohman4cf99b52010-05-18 23:42:37 +00004061 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4062 LSRUse &LU = Uses[LUIdx];
Dan Gohman45774ce2010-02-12 10:34:29 +00004063 if (!LU.Regs.count(Best)) continue;
4064
Dan Gohman4cf99b52010-05-18 23:42:37 +00004065 bool Any = false;
Dan Gohman45774ce2010-02-12 10:34:29 +00004066 for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4067 Formula &F = LU.Formulae[i];
4068 if (!F.referencesReg(Best)) {
4069 DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
Dan Gohmanf1c7b1b2010-05-18 22:39:15 +00004070 LU.DeleteFormula(F);
Dan Gohman45774ce2010-02-12 10:34:29 +00004071 --e;
4072 --i;
Dan Gohman4cf99b52010-05-18 23:42:37 +00004073 Any = true;
Dan Gohmand0800242010-05-07 23:36:59 +00004074 assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?");
Dan Gohman45774ce2010-02-12 10:34:29 +00004075 continue;
4076 }
Dan Gohman45774ce2010-02-12 10:34:29 +00004077 }
Dan Gohman4cf99b52010-05-18 23:42:37 +00004078
4079 if (Any)
4080 LU.RecomputeRegs(LUIdx, RegUses);
Dan Gohman45774ce2010-02-12 10:34:29 +00004081 }
4082
4083 DEBUG(dbgs() << "After pre-selection:\n";
4084 print_uses(dbgs()));
4085 }
4086}
4087
Dan Gohmane9e08732010-08-29 16:09:42 +00004088/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
4089/// formulae to choose from, use some rough heuristics to prune down the number
4090/// of formulae. This keeps the main solver from taking an extraordinary amount
4091/// of time in some worst-case scenarios.
4092void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
4093 NarrowSearchSpaceByDetectingSupersets();
4094 NarrowSearchSpaceByCollapsingUnrolledCode();
Dan Gohman002ff892010-08-29 16:39:22 +00004095 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
Dan Gohmane9e08732010-08-29 16:09:42 +00004096 NarrowSearchSpaceByPickingWinnerRegs();
4097}
4098
Dan Gohman45774ce2010-02-12 10:34:29 +00004099/// SolveRecurse - This is the recursive solver.
4100void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
4101 Cost &SolutionCost,
4102 SmallVectorImpl<const Formula *> &Workspace,
4103 const Cost &CurCost,
4104 const SmallPtrSet<const SCEV *, 16> &CurRegs,
4105 DenseSet<const SCEV *> &VisitedRegs) const {
4106 // Some ideas:
4107 // - prune more:
4108 // - use more aggressive filtering
4109 // - sort the formula so that the most profitable solutions are found first
4110 // - sort the uses too
4111 // - search faster:
Dan Gohman8b0a4192010-03-01 17:49:51 +00004112 // - don't compute a cost, and then compare. compare while computing a cost
Dan Gohman45774ce2010-02-12 10:34:29 +00004113 // and bail early.
4114 // - track register sets with SmallBitVector
4115
4116 const LSRUse &LU = Uses[Workspace.size()];
4117
4118 // If this use references any register that's already a part of the
4119 // in-progress solution, consider it a requirement that a formula must
4120 // reference that register in order to be considered. This prunes out
4121 // unprofitable searching.
4122 SmallSetVector<const SCEV *, 4> ReqRegs;
4123 for (SmallPtrSet<const SCEV *, 16>::const_iterator I = CurRegs.begin(),
4124 E = CurRegs.end(); I != E; ++I)
Dan Gohman5b18f032010-02-13 02:06:02 +00004125 if (LU.Regs.count(*I))
Dan Gohman45774ce2010-02-12 10:34:29 +00004126 ReqRegs.insert(*I);
Dan Gohman45774ce2010-02-12 10:34:29 +00004127
4128 SmallPtrSet<const SCEV *, 16> NewRegs;
4129 Cost NewCost;
4130 for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
4131 E = LU.Formulae.end(); I != E; ++I) {
4132 const Formula &F = *I;
4133
4134 // Ignore formulae which do not use any of the required registers.
Andrew Tricke3502cb2012-03-22 22:42:51 +00004135 bool SatisfiedReqReg = true;
Dan Gohman45774ce2010-02-12 10:34:29 +00004136 for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
4137 JE = ReqRegs.end(); J != JE; ++J) {
4138 const SCEV *Reg = *J;
4139 if ((!F.ScaledReg || F.ScaledReg != Reg) &&
4140 std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) ==
Andrew Tricke3502cb2012-03-22 22:42:51 +00004141 F.BaseRegs.end()) {
4142 SatisfiedReqReg = false;
4143 break;
4144 }
Dan Gohman45774ce2010-02-12 10:34:29 +00004145 }
Andrew Tricke3502cb2012-03-22 22:42:51 +00004146 if (!SatisfiedReqReg) {
4147 // If none of the formulae satisfied the required registers, then we could
4148 // clear ReqRegs and try again. Currently, we simply give up in this case.
4149 continue;
4150 }
Dan Gohman45774ce2010-02-12 10:34:29 +00004151
4152 // Evaluate the cost of the current formula. If it's already worse than
4153 // the current best, prune the search at that point.
4154 NewCost = CurCost;
4155 NewRegs = CurRegs;
Quentin Colombet8aa7abe2013-05-31 17:20:29 +00004156 NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT,
4157 LU);
Dan Gohman45774ce2010-02-12 10:34:29 +00004158 if (NewCost < SolutionCost) {
4159 Workspace.push_back(&F);
4160 if (Workspace.size() != Uses.size()) {
4161 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
4162 NewRegs, VisitedRegs);
4163 if (F.getNumRegs() == 1 && Workspace.size() == 1)
4164 VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
4165 } else {
4166 DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
Andrew Trick4dc3eff2012-01-09 18:58:16 +00004167 dbgs() << ".\n Regs:";
Dan Gohman45774ce2010-02-12 10:34:29 +00004168 for (SmallPtrSet<const SCEV *, 16>::const_iterator
4169 I = NewRegs.begin(), E = NewRegs.end(); I != E; ++I)
4170 dbgs() << ' ' << **I;
4171 dbgs() << '\n');
4172
4173 SolutionCost = NewCost;
4174 Solution = Workspace;
4175 }
4176 Workspace.pop_back();
4177 }
Dan Gohman5b18f032010-02-13 02:06:02 +00004178 }
Dan Gohman45774ce2010-02-12 10:34:29 +00004179}
4180
Dan Gohmana4ca28a2010-05-20 20:52:00 +00004181/// Solve - Choose one formula from each use. Return the results in the given
4182/// Solution vector.
Dan Gohman45774ce2010-02-12 10:34:29 +00004183void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
4184 SmallVector<const Formula *, 8> Workspace;
4185 Cost SolutionCost;
Tim Northoverbc6659c2014-01-22 13:27:00 +00004186 SolutionCost.Lose();
Dan Gohman45774ce2010-02-12 10:34:29 +00004187 Cost CurCost;
4188 SmallPtrSet<const SCEV *, 16> CurRegs;
4189 DenseSet<const SCEV *> VisitedRegs;
4190 Workspace.reserve(Uses.size());
4191
Dan Gohman8ec018c2010-05-20 20:00:41 +00004192 // SolveRecurse does all the work.
Dan Gohman45774ce2010-02-12 10:34:29 +00004193 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
4194 CurRegs, VisitedRegs);
Andrew Trick58124392011-09-27 00:44:14 +00004195 if (Solution.empty()) {
4196 DEBUG(dbgs() << "\nNo Satisfactory Solution\n");
4197 return;
4198 }
Dan Gohman45774ce2010-02-12 10:34:29 +00004199
4200 // Ok, we've now made all our decisions.
4201 DEBUG(dbgs() << "\n"
4202 "The chosen solution requires "; SolutionCost.print(dbgs());
4203 dbgs() << ":\n";
4204 for (size_t i = 0, e = Uses.size(); i != e; ++i) {
4205 dbgs() << " ";
4206 Uses[i].print(dbgs());
4207 dbgs() << "\n"
4208 " ";
4209 Solution[i]->print(dbgs());
4210 dbgs() << '\n';
4211 });
Dan Gohman6295f2e2010-05-20 20:59:23 +00004212
4213 assert(Solution.size() == Uses.size() && "Malformed solution!");
Dan Gohman45774ce2010-02-12 10:34:29 +00004214}
4215
Dan Gohman607e02b2010-04-09 22:07:05 +00004216/// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up
4217/// the dominator tree far as we can go while still being dominated by the
4218/// input positions. This helps canonicalize the insert position, which
4219/// encourages sharing.
4220BasicBlock::iterator
4221LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
4222 const SmallVectorImpl<Instruction *> &Inputs)
4223 const {
4224 for (;;) {
4225 const Loop *IPLoop = LI.getLoopFor(IP->getParent());
4226 unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
4227
4228 BasicBlock *IDom;
Dan Gohman8ce95cc2010-05-20 20:00:25 +00004229 for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
Dan Gohman9b48b852010-05-20 22:46:54 +00004230 if (!Rung) return IP;
Dan Gohman8ce95cc2010-05-20 20:00:25 +00004231 Rung = Rung->getIDom();
4232 if (!Rung) return IP;
4233 IDom = Rung->getBlock();
Dan Gohman607e02b2010-04-09 22:07:05 +00004234
4235 // Don't climb into a loop though.
4236 const Loop *IDomLoop = LI.getLoopFor(IDom);
4237 unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
4238 if (IDomDepth <= IPLoopDepth &&
4239 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
4240 break;
4241 }
4242
4243 bool AllDominate = true;
4244 Instruction *BetterPos = 0;
4245 Instruction *Tentative = IDom->getTerminator();
4246 for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
4247 E = Inputs.end(); I != E; ++I) {
4248 Instruction *Inst = *I;
4249 if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
4250 AllDominate = false;
4251 break;
4252 }
4253 // Attempt to find an insert position in the middle of the block,
4254 // instead of at the end, so that it can be used for other expansions.
4255 if (IDom == Inst->getParent() &&
Rafael Espindoladd489312012-04-30 03:53:06 +00004256 (!BetterPos || !DT.dominates(Inst, BetterPos)))
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +00004257 BetterPos = std::next(BasicBlock::iterator(Inst));
Dan Gohman607e02b2010-04-09 22:07:05 +00004258 }
4259 if (!AllDominate)
4260 break;
4261 if (BetterPos)
4262 IP = BetterPos;
4263 else
4264 IP = Tentative;
4265 }
4266
4267 return IP;
4268}
4269
4270/// AdjustInsertPositionForExpand - Determine an input position which will be
Dan Gohmand2df6432010-04-09 02:00:38 +00004271/// dominated by the operands and which will dominate the result.
4272BasicBlock::iterator
Andrew Trickc908b432012-01-20 07:41:13 +00004273LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
Dan Gohman607e02b2010-04-09 22:07:05 +00004274 const LSRFixup &LF,
Andrew Trickc908b432012-01-20 07:41:13 +00004275 const LSRUse &LU,
4276 SCEVExpander &Rewriter) const {
Dan Gohmand2df6432010-04-09 02:00:38 +00004277 // Collect some instructions which must be dominated by the
Dan Gohmand006ab92010-04-07 22:27:08 +00004278 // expanding replacement. These must be dominated by any operands that
Dan Gohman45774ce2010-02-12 10:34:29 +00004279 // will be required in the expansion.
4280 SmallVector<Instruction *, 4> Inputs;
4281 if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
4282 Inputs.push_back(I);
4283 if (LU.Kind == LSRUse::ICmpZero)
4284 if (Instruction *I =
4285 dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
4286 Inputs.push_back(I);
Dan Gohmand006ab92010-04-07 22:27:08 +00004287 if (LF.PostIncLoops.count(L)) {
4288 if (LF.isUseFullyOutsideLoop(L))
Dan Gohman52f55632010-03-02 01:59:21 +00004289 Inputs.push_back(L->getLoopLatch()->getTerminator());
4290 else
4291 Inputs.push_back(IVIncInsertPos);
4292 }
Dan Gohman45065392010-04-08 05:57:57 +00004293 // The expansion must also be dominated by the increment positions of any
4294 // loops it for which it is using post-inc mode.
4295 for (PostIncLoopSet::const_iterator I = LF.PostIncLoops.begin(),
4296 E = LF.PostIncLoops.end(); I != E; ++I) {
4297 const Loop *PIL = *I;
4298 if (PIL == L) continue;
4299
Dan Gohman607e02b2010-04-09 22:07:05 +00004300 // Be dominated by the loop exit.
Dan Gohman45065392010-04-08 05:57:57 +00004301 SmallVector<BasicBlock *, 4> ExitingBlocks;
4302 PIL->getExitingBlocks(ExitingBlocks);
4303 if (!ExitingBlocks.empty()) {
4304 BasicBlock *BB = ExitingBlocks[0];
4305 for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)
4306 BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
4307 Inputs.push_back(BB->getTerminator());
4308 }
4309 }
Dan Gohman45774ce2010-02-12 10:34:29 +00004310
Andrew Trickc908b432012-01-20 07:41:13 +00004311 assert(!isa<PHINode>(LowestIP) && !isa<LandingPadInst>(LowestIP)
4312 && !isa<DbgInfoIntrinsic>(LowestIP) &&
4313 "Insertion point must be a normal instruction");
4314
Dan Gohman45774ce2010-02-12 10:34:29 +00004315 // Then, climb up the immediate dominator tree as far as we can go while
4316 // still being dominated by the input positions.
Andrew Trickc908b432012-01-20 07:41:13 +00004317 BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs);
Dan Gohmand2df6432010-04-09 02:00:38 +00004318
4319 // Don't insert instructions before PHI nodes.
Dan Gohman45774ce2010-02-12 10:34:29 +00004320 while (isa<PHINode>(IP)) ++IP;
Dan Gohmand2df6432010-04-09 02:00:38 +00004321
Bill Wendling86c5cbe2011-08-24 21:06:46 +00004322 // Ignore landingpad instructions.
4323 while (isa<LandingPadInst>(IP)) ++IP;
4324
Dan Gohmand2df6432010-04-09 02:00:38 +00004325 // Ignore debug intrinsics.
Dan Gohmand42e09d2010-03-26 00:33:27 +00004326 while (isa<DbgInfoIntrinsic>(IP)) ++IP;
Dan Gohman45774ce2010-02-12 10:34:29 +00004327
Andrew Trickc908b432012-01-20 07:41:13 +00004328 // Set IP below instructions recently inserted by SCEVExpander. This keeps the
4329 // IP consistent across expansions and allows the previously inserted
4330 // instructions to be reused by subsequent expansion.
4331 while (Rewriter.isInsertedInstruction(IP) && IP != LowestIP) ++IP;
4332
Dan Gohmand2df6432010-04-09 02:00:38 +00004333 return IP;
4334}
4335
Dan Gohmana4ca28a2010-05-20 20:52:00 +00004336/// Expand - Emit instructions for the leading candidate expression for this
4337/// LSRUse (this is called "expanding").
Dan Gohmand2df6432010-04-09 02:00:38 +00004338Value *LSRInstance::Expand(const LSRFixup &LF,
4339 const Formula &F,
4340 BasicBlock::iterator IP,
4341 SCEVExpander &Rewriter,
4342 SmallVectorImpl<WeakVH> &DeadInsts) const {
4343 const LSRUse &LU = Uses[LF.LUIdx];
Andrew Trick57243da2013-10-25 21:35:56 +00004344 if (LU.RigidFormula)
4345 return LF.OperandValToReplace;
Dan Gohmand2df6432010-04-09 02:00:38 +00004346
4347 // Determine an input position which will be dominated by the operands and
4348 // which will dominate the result.
Andrew Trickc908b432012-01-20 07:41:13 +00004349 IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter);
Dan Gohmand2df6432010-04-09 02:00:38 +00004350
Dan Gohman45774ce2010-02-12 10:34:29 +00004351 // Inform the Rewriter if we have a post-increment use, so that it can
4352 // perform an advantageous expansion.
Dan Gohmand006ab92010-04-07 22:27:08 +00004353 Rewriter.setPostInc(LF.PostIncLoops);
Dan Gohman45774ce2010-02-12 10:34:29 +00004354
4355 // This is the type that the user actually needs.
Chris Lattner229907c2011-07-18 04:54:35 +00004356 Type *OpTy = LF.OperandValToReplace->getType();
Dan Gohman45774ce2010-02-12 10:34:29 +00004357 // This will be the type that we'll initially expand to.
Chris Lattner229907c2011-07-18 04:54:35 +00004358 Type *Ty = F.getType();
Dan Gohman45774ce2010-02-12 10:34:29 +00004359 if (!Ty)
4360 // No type known; just expand directly to the ultimate type.
4361 Ty = OpTy;
4362 else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
4363 // Expand directly to the ultimate type if it's the right size.
4364 Ty = OpTy;
4365 // This is the type to do integer arithmetic in.
Chris Lattner229907c2011-07-18 04:54:35 +00004366 Type *IntTy = SE.getEffectiveSCEVType(Ty);
Dan Gohman45774ce2010-02-12 10:34:29 +00004367
4368 // Build up a list of operands to add together to form the full base.
4369 SmallVector<const SCEV *, 8> Ops;
4370
4371 // Expand the BaseRegs portion.
4372 for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
4373 E = F.BaseRegs.end(); I != E; ++I) {
4374 const SCEV *Reg = *I;
4375 assert(!Reg->isZero() && "Zero allocated in a base register!");
4376
Dan Gohmand006ab92010-04-07 22:27:08 +00004377 // If we're expanding for a post-inc user, make the post-inc adjustment.
4378 PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
4379 Reg = TransformForPostIncUse(Denormalize, Reg,
4380 LF.UserInst, LF.OperandValToReplace,
4381 Loops, SE, DT);
Dan Gohman45774ce2010-02-12 10:34:29 +00004382
4383 Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
4384 }
4385
4386 // Expand the ScaledReg portion.
4387 Value *ICmpScaledV = 0;
Chandler Carruth6e479322013-01-07 15:04:40 +00004388 if (F.Scale != 0) {
Dan Gohman45774ce2010-02-12 10:34:29 +00004389 const SCEV *ScaledS = F.ScaledReg;
4390
Dan Gohmand006ab92010-04-07 22:27:08 +00004391 // If we're expanding for a post-inc user, make the post-inc adjustment.
4392 PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
4393 ScaledS = TransformForPostIncUse(Denormalize, ScaledS,
4394 LF.UserInst, LF.OperandValToReplace,
4395 Loops, SE, DT);
Dan Gohman45774ce2010-02-12 10:34:29 +00004396
4397 if (LU.Kind == LSRUse::ICmpZero) {
4398 // An interesting way of "folding" with an icmp is to use a negated
4399 // scale, which we'll implement by inserting it into the other operand
4400 // of the icmp.
Chandler Carruth6e479322013-01-07 15:04:40 +00004401 assert(F.Scale == -1 &&
Dan Gohman45774ce2010-02-12 10:34:29 +00004402 "The only scale supported by ICmpZero uses is -1!");
4403 ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP);
4404 } else {
4405 // Otherwise just expand the scaled register and an explicit scale,
4406 // which is expected to be matched as part of the address.
Andrew Trick8370c7c2012-06-15 20:07:29 +00004407
4408 // Flush the operand list to suppress SCEVExpander hoisting address modes.
4409 if (!Ops.empty() && LU.Kind == LSRUse::Address) {
4410 Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
4411 Ops.clear();
4412 Ops.push_back(SE.getUnknown(FullV));
4413 }
Dan Gohman45774ce2010-02-12 10:34:29 +00004414 ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
4415 ScaledS = SE.getMulExpr(ScaledS,
Chandler Carruth6e479322013-01-07 15:04:40 +00004416 SE.getConstant(ScaledS->getType(), F.Scale));
Dan Gohman45774ce2010-02-12 10:34:29 +00004417 Ops.push_back(ScaledS);
4418 }
4419 }
4420
Dan Gohman29707de2010-03-03 05:29:13 +00004421 // Expand the GV portion.
Chandler Carruth6e479322013-01-07 15:04:40 +00004422 if (F.BaseGV) {
Dan Gohman29707de2010-03-03 05:29:13 +00004423 // Flush the operand list to suppress SCEVExpander hoisting.
Andrew Trick8370c7c2012-06-15 20:07:29 +00004424 if (!Ops.empty()) {
4425 Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
4426 Ops.clear();
4427 Ops.push_back(SE.getUnknown(FullV));
4428 }
Chandler Carruth6e479322013-01-07 15:04:40 +00004429 Ops.push_back(SE.getUnknown(F.BaseGV));
Andrew Trick8370c7c2012-06-15 20:07:29 +00004430 }
4431
4432 // Flush the operand list to suppress SCEVExpander hoisting of both folded and
4433 // unfolded offsets. LSR assumes they both live next to their uses.
4434 if (!Ops.empty()) {
Dan Gohman29707de2010-03-03 05:29:13 +00004435 Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
4436 Ops.clear();
4437 Ops.push_back(SE.getUnknown(FullV));
4438 }
4439
4440 // Expand the immediate portion.
Chandler Carruth6e479322013-01-07 15:04:40 +00004441 int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset;
Dan Gohman45774ce2010-02-12 10:34:29 +00004442 if (Offset != 0) {
4443 if (LU.Kind == LSRUse::ICmpZero) {
4444 // The other interesting way of "folding" with an ICmpZero is to use a
4445 // negated immediate.
4446 if (!ICmpScaledV)
Eli Friedmanb46345d2011-10-13 23:48:33 +00004447 ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset);
Dan Gohman45774ce2010-02-12 10:34:29 +00004448 else {
4449 Ops.push_back(SE.getUnknown(ICmpScaledV));
4450 ICmpScaledV = ConstantInt::get(IntTy, Offset);
4451 }
4452 } else {
4453 // Just add the immediate values. These again are expected to be matched
4454 // as part of the address.
Dan Gohman29707de2010-03-03 05:29:13 +00004455 Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset)));
Dan Gohman45774ce2010-02-12 10:34:29 +00004456 }
4457 }
4458
Dan Gohman6136e942011-05-03 00:46:49 +00004459 // Expand the unfolded offset portion.
4460 int64_t UnfoldedOffset = F.UnfoldedOffset;
4461 if (UnfoldedOffset != 0) {
4462 // Just add the immediate values.
4463 Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy,
4464 UnfoldedOffset)));
4465 }
4466
Dan Gohman45774ce2010-02-12 10:34:29 +00004467 // Emit instructions summing all the operands.
4468 const SCEV *FullS = Ops.empty() ?
Dan Gohman1d2ded72010-05-03 22:09:21 +00004469 SE.getConstant(IntTy, 0) :
Dan Gohman45774ce2010-02-12 10:34:29 +00004470 SE.getAddExpr(Ops);
4471 Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
4472
4473 // We're done expanding now, so reset the rewriter.
Dan Gohmand006ab92010-04-07 22:27:08 +00004474 Rewriter.clearPostInc();
Dan Gohman45774ce2010-02-12 10:34:29 +00004475
4476 // An ICmpZero Formula represents an ICmp which we're handling as a
4477 // comparison against zero. Now that we've expanded an expression for that
4478 // form, update the ICmp's other operand.
4479 if (LU.Kind == LSRUse::ICmpZero) {
4480 ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
4481 DeadInsts.push_back(CI->getOperand(1));
Chandler Carruth6e479322013-01-07 15:04:40 +00004482 assert(!F.BaseGV && "ICmp does not support folding a global value and "
Dan Gohman45774ce2010-02-12 10:34:29 +00004483 "a scale at the same time!");
Chandler Carruth6e479322013-01-07 15:04:40 +00004484 if (F.Scale == -1) {
Dan Gohman45774ce2010-02-12 10:34:29 +00004485 if (ICmpScaledV->getType() != OpTy) {
4486 Instruction *Cast =
4487 CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
4488 OpTy, false),
4489 ICmpScaledV, OpTy, "tmp", CI);
4490 ICmpScaledV = Cast;
4491 }
4492 CI->setOperand(1, ICmpScaledV);
4493 } else {
Chandler Carruth6e479322013-01-07 15:04:40 +00004494 assert(F.Scale == 0 &&
Dan Gohman45774ce2010-02-12 10:34:29 +00004495 "ICmp does not support folding a global value and "
4496 "a scale at the same time!");
4497 Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
4498 -(uint64_t)Offset);
4499 if (C->getType() != OpTy)
4500 C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
4501 OpTy, false),
4502 C, OpTy);
4503
4504 CI->setOperand(1, C);
4505 }
4506 }
4507
4508 return FullV;
4509}
4510
Dan Gohman6deab962010-02-16 20:25:07 +00004511/// RewriteForPHI - Helper for Rewrite. PHI nodes are special because the use
4512/// of their operands effectively happens in their predecessor blocks, so the
4513/// expression may need to be expanded in multiple places.
4514void LSRInstance::RewriteForPHI(PHINode *PN,
4515 const LSRFixup &LF,
4516 const Formula &F,
Dan Gohman6deab962010-02-16 20:25:07 +00004517 SCEVExpander &Rewriter,
4518 SmallVectorImpl<WeakVH> &DeadInsts,
Dan Gohman6deab962010-02-16 20:25:07 +00004519 Pass *P) const {
4520 DenseMap<BasicBlock *, Value *> Inserted;
4521 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
4522 if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
4523 BasicBlock *BB = PN->getIncomingBlock(i);
4524
4525 // If this is a critical edge, split the edge so that we do not insert
4526 // the code on all predecessor/successor paths. We do this unless this
4527 // is the canonical backedge for this loop, which complicates post-inc
4528 // users.
4529 if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
Dan Gohmande7f6992011-02-08 00:55:13 +00004530 !isa<IndirectBrInst>(BB->getTerminator())) {
Bill Wendling07efd6f2011-08-25 01:08:34 +00004531 BasicBlock *Parent = PN->getParent();
4532 Loop *PNLoop = LI.getLoopFor(Parent);
4533 if (!PNLoop || Parent != PNLoop->getHeader()) {
Dan Gohmande7f6992011-02-08 00:55:13 +00004534 // Split the critical edge.
Bill Wendling3fb137f2011-08-25 05:55:40 +00004535 BasicBlock *NewBB = 0;
4536 if (!Parent->isLandingPad()) {
Andrew Trick8de329a2011-10-04 03:50:44 +00004537 NewBB = SplitCriticalEdge(BB, Parent, P,
4538 /*MergeIdenticalEdges=*/true,
4539 /*DontDeleteUselessPhis=*/true);
Bill Wendling3fb137f2011-08-25 05:55:40 +00004540 } else {
4541 SmallVector<BasicBlock*, 2> NewBBs;
4542 SplitLandingPadPredecessors(Parent, BB, "", "", P, NewBBs);
4543 NewBB = NewBBs[0];
4544 }
Andrew Trick402edbb2012-09-18 17:51:33 +00004545 // If NewBB==NULL, then SplitCriticalEdge refused to split because all
4546 // phi predecessors are identical. The simple thing to do is skip
4547 // splitting in this case rather than complicate the API.
4548 if (NewBB) {
4549 // If PN is outside of the loop and BB is in the loop, we want to
4550 // move the block to be immediately before the PHI block, not
4551 // immediately after BB.
4552 if (L->contains(BB) && !L->contains(PN))
4553 NewBB->moveBefore(PN->getParent());
Dan Gohman6deab962010-02-16 20:25:07 +00004554
Andrew Trick402edbb2012-09-18 17:51:33 +00004555 // Splitting the edge can reduce the number of PHI entries we have.
4556 e = PN->getNumIncomingValues();
4557 BB = NewBB;
4558 i = PN->getBasicBlockIndex(BB);
4559 }
Dan Gohmande7f6992011-02-08 00:55:13 +00004560 }
Dan Gohman6deab962010-02-16 20:25:07 +00004561 }
4562
4563 std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
4564 Inserted.insert(std::make_pair(BB, static_cast<Value *>(0)));
4565 if (!Pair.second)
4566 PN->setIncomingValue(i, Pair.first->second);
4567 else {
Dan Gohman8c16b382010-02-22 04:11:59 +00004568 Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts);
Dan Gohman6deab962010-02-16 20:25:07 +00004569
4570 // If this is reuse-by-noop-cast, insert the noop cast.
Chris Lattner229907c2011-07-18 04:54:35 +00004571 Type *OpTy = LF.OperandValToReplace->getType();
Dan Gohman6deab962010-02-16 20:25:07 +00004572 if (FullV->getType() != OpTy)
4573 FullV =
4574 CastInst::Create(CastInst::getCastOpcode(FullV, false,
4575 OpTy, false),
4576 FullV, LF.OperandValToReplace->getType(),
4577 "tmp", BB->getTerminator());
4578
4579 PN->setIncomingValue(i, FullV);
4580 Pair.first->second = FullV;
4581 }
4582 }
4583}
4584
Dan Gohman45774ce2010-02-12 10:34:29 +00004585/// Rewrite - Emit instructions for the leading candidate expression for this
4586/// LSRUse (this is called "expanding"), and update the UserInst to reference
4587/// the newly expanded value.
4588void LSRInstance::Rewrite(const LSRFixup &LF,
4589 const Formula &F,
Dan Gohman45774ce2010-02-12 10:34:29 +00004590 SCEVExpander &Rewriter,
4591 SmallVectorImpl<WeakVH> &DeadInsts,
Dan Gohman45774ce2010-02-12 10:34:29 +00004592 Pass *P) const {
Dan Gohman45774ce2010-02-12 10:34:29 +00004593 // First, find an insertion point that dominates UserInst. For PHI nodes,
4594 // find the nearest block which dominates all the relevant uses.
4595 if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
Dan Gohman8c16b382010-02-22 04:11:59 +00004596 RewriteForPHI(PN, LF, F, Rewriter, DeadInsts, P);
Dan Gohman45774ce2010-02-12 10:34:29 +00004597 } else {
Dan Gohman8c16b382010-02-22 04:11:59 +00004598 Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts);
Dan Gohman45774ce2010-02-12 10:34:29 +00004599
4600 // If this is reuse-by-noop-cast, insert the noop cast.
Chris Lattner229907c2011-07-18 04:54:35 +00004601 Type *OpTy = LF.OperandValToReplace->getType();
Dan Gohman45774ce2010-02-12 10:34:29 +00004602 if (FullV->getType() != OpTy) {
4603 Instruction *Cast =
4604 CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
4605 FullV, OpTy, "tmp", LF.UserInst);
4606 FullV = Cast;
4607 }
4608
4609 // Update the user. ICmpZero is handled specially here (for now) because
4610 // Expand may have updated one of the operands of the icmp already, and
4611 // its new value may happen to be equal to LF.OperandValToReplace, in
4612 // which case doing replaceUsesOfWith leads to replacing both operands
4613 // with the same value. TODO: Reorganize this.
4614 if (Uses[LF.LUIdx].Kind == LSRUse::ICmpZero)
4615 LF.UserInst->setOperand(0, FullV);
4616 else
4617 LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
4618 }
4619
4620 DeadInsts.push_back(LF.OperandValToReplace);
4621}
4622
Dan Gohmana4ca28a2010-05-20 20:52:00 +00004623/// ImplementSolution - Rewrite all the fixup locations with new values,
4624/// following the chosen solution.
Dan Gohman45774ce2010-02-12 10:34:29 +00004625void
4626LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
4627 Pass *P) {
4628 // Keep track of instructions we may have made dead, so that
4629 // we can remove them after we are done working.
4630 SmallVector<WeakVH, 16> DeadInsts;
4631
Andrew Trick411daa52011-06-28 05:07:32 +00004632 SCEVExpander Rewriter(SE, "lsr");
Andrew Trick4dc3eff2012-01-09 18:58:16 +00004633#ifndef NDEBUG
4634 Rewriter.setDebugType(DEBUG_TYPE);
4635#endif
Dan Gohman45774ce2010-02-12 10:34:29 +00004636 Rewriter.disableCanonicalMode();
Andrew Trick7fb669a2011-10-07 23:46:21 +00004637 Rewriter.enableLSRMode();
Dan Gohman45774ce2010-02-12 10:34:29 +00004638 Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
4639
Andrew Trickd5d2db92012-01-10 01:45:08 +00004640 // Mark phi nodes that terminate chains so the expander tries to reuse them.
4641 for (SmallVectorImpl<IVChain>::const_iterator ChainI = IVChainVec.begin(),
4642 ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) {
Jakob Stoklund Olesena0337d72012-04-26 23:33:09 +00004643 if (PHINode *PN = dyn_cast<PHINode>(ChainI->tailUserInst()))
Andrew Trickd5d2db92012-01-10 01:45:08 +00004644 Rewriter.setChainedPhi(PN);
4645 }
4646
Dan Gohman45774ce2010-02-12 10:34:29 +00004647 // Expand the new value definitions and update the users.
Dan Gohman927bcaa2010-05-20 20:33:18 +00004648 for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
4649 E = Fixups.end(); I != E; ++I) {
4650 const LSRFixup &Fixup = *I;
Dan Gohman45774ce2010-02-12 10:34:29 +00004651
Dan Gohman927bcaa2010-05-20 20:33:18 +00004652 Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts, P);
Dan Gohman45774ce2010-02-12 10:34:29 +00004653
4654 Changed = true;
4655 }
4656
Andrew Trick248d4102012-01-09 21:18:52 +00004657 for (SmallVectorImpl<IVChain>::const_iterator ChainI = IVChainVec.begin(),
4658 ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) {
4659 GenerateIVChain(*ChainI, Rewriter, DeadInsts);
4660 Changed = true;
4661 }
Dan Gohman45774ce2010-02-12 10:34:29 +00004662 // Clean up after ourselves. This must be done before deleting any
4663 // instructions.
4664 Rewriter.clear();
4665
4666 Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
4667}
4668
Chandler Carruth26c59fa2013-01-07 14:41:08 +00004669LSRInstance::LSRInstance(Loop *L, Pass *P)
4670 : IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()),
Chandler Carruth73523022014-01-13 13:07:17 +00004671 DT(P->getAnalysis<DominatorTreeWrapperPass>().getDomTree()),
4672 LI(P->getAnalysis<LoopInfo>()),
Chandler Carruth26c59fa2013-01-07 14:41:08 +00004673 TTI(P->getAnalysis<TargetTransformInfo>()), L(L), Changed(false),
4674 IVIncInsertPos(0) {
Dan Gohmana83ac2d2009-11-05 21:11:53 +00004675 // If LoopSimplify form is not available, stay out of trouble.
Andrew Trick732ad802012-01-07 03:16:50 +00004676 if (!L->isLoopSimplifyForm())
4677 return;
Dan Gohmana83ac2d2009-11-05 21:11:53 +00004678
Andrew Trick070e5402012-03-16 03:16:56 +00004679 // If there's no interesting work to be done, bail early.
4680 if (IU.empty()) return;
4681
Andrew Trick19f80c12012-04-18 04:00:10 +00004682 // If there's too much analysis to be done, bail early. We won't be able to
4683 // model the problem anyway.
4684 unsigned NumUsers = 0;
4685 for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
4686 if (++NumUsers > MaxIVUsers) {
4687 DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << *L
4688 << "\n");
4689 return;
4690 }
4691 }
4692
Andrew Trick070e5402012-03-16 03:16:56 +00004693#ifndef NDEBUG
Andrew Trick12728f02012-01-17 06:45:52 +00004694 // All dominating loops must have preheaders, or SCEVExpander may not be able
4695 // to materialize an AddRecExpr whose Start is an outer AddRecExpr.
4696 //
Andrew Trick070e5402012-03-16 03:16:56 +00004697 // IVUsers analysis should only create users that are dominated by simple loop
4698 // headers. Since this loop should dominate all of its users, its user list
4699 // should be empty if this loop itself is not within a simple loop nest.
Andrew Trick12728f02012-01-17 06:45:52 +00004700 for (DomTreeNode *Rung = DT.getNode(L->getLoopPreheader());
4701 Rung; Rung = Rung->getIDom()) {
4702 BasicBlock *BB = Rung->getBlock();
4703 const Loop *DomLoop = LI.getLoopFor(BB);
4704 if (DomLoop && DomLoop->getHeader() == BB) {
Andrew Trick070e5402012-03-16 03:16:56 +00004705 assert(DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest");
Andrew Trick12728f02012-01-17 06:45:52 +00004706 }
Andrew Trick732ad802012-01-07 03:16:50 +00004707 }
Andrew Trick070e5402012-03-16 03:16:56 +00004708#endif // DEBUG
Dan Gohman85875f72009-03-09 20:34:59 +00004709
Dan Gohman45774ce2010-02-12 10:34:29 +00004710 DEBUG(dbgs() << "\nLSR on loop ";
Chandler Carruthd48cdbf2014-01-09 02:29:41 +00004711 L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false);
Dan Gohman45774ce2010-02-12 10:34:29 +00004712 dbgs() << ":\n");
Dan Gohmane201f8f2009-03-09 20:46:50 +00004713
Dan Gohman927bcaa2010-05-20 20:33:18 +00004714 // First, perform some low-level loop optimizations.
Dan Gohman45774ce2010-02-12 10:34:29 +00004715 OptimizeShadowIV();
Dan Gohman4c4043c2010-05-20 20:05:31 +00004716 OptimizeLoopTermCond();
Evan Cheng78a4eb82009-05-11 22:33:01 +00004717
Andrew Trick8acb4342011-07-21 00:40:04 +00004718 // If loop preparation eliminates all interesting IV users, bail.
4719 if (IU.empty()) return;
4720
Andrew Trick168dfff2011-09-29 01:53:08 +00004721 // Skip nested loops until we can model them better with formulae.
Andrew Trickd97b83e2012-03-22 22:42:45 +00004722 if (!L->empty()) {
Andrew Trickbc6de902011-09-29 01:33:38 +00004723 DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
Andrew Trick168dfff2011-09-29 01:53:08 +00004724 return;
Andrew Trickbc6de902011-09-29 01:33:38 +00004725 }
4726
Dan Gohman927bcaa2010-05-20 20:33:18 +00004727 // Start collecting data and preparing for the solver.
Andrew Trick29fe5f02012-01-09 19:50:34 +00004728 CollectChains();
Dan Gohman45774ce2010-02-12 10:34:29 +00004729 CollectInterestingTypesAndFactors();
4730 CollectFixupsAndInitialFormulae();
4731 CollectLoopInvariantFixupsAndFormulae();
Chris Lattner9bfa6f82005-08-08 05:28:22 +00004732
Andrew Trick248d4102012-01-09 21:18:52 +00004733 assert(!Uses.empty() && "IVUsers reported at least one use");
Dan Gohman45774ce2010-02-12 10:34:29 +00004734 DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
4735 print_uses(dbgs()));
Misha Brukmanb1c93172005-04-21 23:48:37 +00004736
Dan Gohman45774ce2010-02-12 10:34:29 +00004737 // Now use the reuse data to generate a bunch of interesting ways
4738 // to formulate the values needed for the uses.
4739 GenerateAllReuseFormulae();
Evan Cheng3df447d2006-03-16 21:53:05 +00004740
Dan Gohman45774ce2010-02-12 10:34:29 +00004741 FilterOutUndesirableDedicatedRegisters();
4742 NarrowSearchSpaceUsingHeuristics();
Dan Gohman92c36962009-12-18 00:06:20 +00004743
Dan Gohman45774ce2010-02-12 10:34:29 +00004744 SmallVector<const Formula *, 8> Solution;
4745 Solve(Solution);
Dan Gohman92c36962009-12-18 00:06:20 +00004746
Dan Gohman45774ce2010-02-12 10:34:29 +00004747 // Release memory that is no longer needed.
4748 Factors.clear();
4749 Types.clear();
4750 RegUses.clear();
4751
Andrew Trick58124392011-09-27 00:44:14 +00004752 if (Solution.empty())
4753 return;
4754
Dan Gohman45774ce2010-02-12 10:34:29 +00004755#ifndef NDEBUG
4756 // Formulae should be legal.
Chandler Carruth26c59fa2013-01-07 14:41:08 +00004757 for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), E = Uses.end();
4758 I != E; ++I) {
4759 const LSRUse &LU = *I;
4760 for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
4761 JE = LU.Formulae.end();
4762 J != JE; ++J)
4763 assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4764 *J) && "Illegal formula generated!");
Dan Gohman45774ce2010-02-12 10:34:29 +00004765 };
4766#endif
4767
4768 // Now that we've decided what we want, make it so.
4769 ImplementSolution(Solution, P);
4770}
4771
4772void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
4773 if (Factors.empty() && Types.empty()) return;
4774
4775 OS << "LSR has identified the following interesting factors and types: ";
4776 bool First = true;
4777
4778 for (SmallSetVector<int64_t, 8>::const_iterator
4779 I = Factors.begin(), E = Factors.end(); I != E; ++I) {
4780 if (!First) OS << ", ";
4781 First = false;
4782 OS << '*' << *I;
Evan Cheng87fe40b2009-11-10 21:14:05 +00004783 }
Dale Johannesen02cb2bf2009-05-11 17:15:42 +00004784
Chris Lattner229907c2011-07-18 04:54:35 +00004785 for (SmallSetVector<Type *, 4>::const_iterator
Dan Gohman45774ce2010-02-12 10:34:29 +00004786 I = Types.begin(), E = Types.end(); I != E; ++I) {
4787 if (!First) OS << ", ";
4788 First = false;
4789 OS << '(' << **I << ')';
4790 }
4791 OS << '\n';
4792}
4793
4794void LSRInstance::print_fixups(raw_ostream &OS) const {
4795 OS << "LSR is examining the following fixup sites:\n";
4796 for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
4797 E = Fixups.end(); I != E; ++I) {
Dan Gohman45774ce2010-02-12 10:34:29 +00004798 dbgs() << " ";
Dan Gohman86110fa2010-05-20 22:25:20 +00004799 I->print(OS);
Dan Gohman45774ce2010-02-12 10:34:29 +00004800 OS << '\n';
4801 }
4802}
4803
4804void LSRInstance::print_uses(raw_ostream &OS) const {
4805 OS << "LSR is examining the following uses:\n";
4806 for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
4807 E = Uses.end(); I != E; ++I) {
4808 const LSRUse &LU = *I;
4809 dbgs() << " ";
4810 LU.print(OS);
4811 OS << '\n';
4812 for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
4813 JE = LU.Formulae.end(); J != JE; ++J) {
4814 OS << " ";
4815 J->print(OS);
4816 OS << '\n';
4817 }
4818 }
4819}
4820
4821void LSRInstance::print(raw_ostream &OS) const {
4822 print_factors_and_types(OS);
4823 print_fixups(OS);
4824 print_uses(OS);
4825}
4826
Manman Ren49d684e2012-09-12 05:06:18 +00004827#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Dan Gohman45774ce2010-02-12 10:34:29 +00004828void LSRInstance::dump() const {
4829 print(errs()); errs() << '\n';
4830}
Manman Renc3366cc2012-09-06 19:55:56 +00004831#endif
Dan Gohman45774ce2010-02-12 10:34:29 +00004832
4833namespace {
4834
4835class LoopStrengthReduce : public LoopPass {
Dan Gohman45774ce2010-02-12 10:34:29 +00004836public:
4837 static char ID; // Pass ID, replacement for typeid
Chandler Carruth26c59fa2013-01-07 14:41:08 +00004838 LoopStrengthReduce();
Dan Gohman45774ce2010-02-12 10:34:29 +00004839
4840private:
Craig Topper3e4c6972014-03-05 09:10:37 +00004841 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
4842 void getAnalysisUsage(AnalysisUsage &AU) const override;
Dan Gohman45774ce2010-02-12 10:34:29 +00004843};
4844
4845}
4846
4847char LoopStrengthReduce::ID = 0;
Owen Anderson8ac477f2010-10-12 19:48:12 +00004848INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
Owen Andersondf7a4f22010-10-07 22:25:06 +00004849 "Loop Strength Reduction", false, false)
Chandler Carruth26c59fa2013-01-07 14:41:08 +00004850INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
Chandler Carruth73523022014-01-13 13:07:17 +00004851INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
Owen Anderson8ac477f2010-10-12 19:48:12 +00004852INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
4853INITIALIZE_PASS_DEPENDENCY(IVUsers)
Owen Andersona4fefc12010-10-19 20:08:44 +00004854INITIALIZE_PASS_DEPENDENCY(LoopInfo)
4855INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
Owen Anderson8ac477f2010-10-12 19:48:12 +00004856INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
4857 "Loop Strength Reduction", false, false)
4858
Nadav Rotem4dc976f2012-10-19 21:28:43 +00004859
Chandler Carruth26c59fa2013-01-07 14:41:08 +00004860Pass *llvm::createLoopStrengthReducePass() {
4861 return new LoopStrengthReduce();
Dan Gohman45774ce2010-02-12 10:34:29 +00004862}
4863
Chandler Carruth26c59fa2013-01-07 14:41:08 +00004864LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {
4865 initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
4866}
Dan Gohman45774ce2010-02-12 10:34:29 +00004867
4868void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
4869 // We split critical edges, so we change the CFG. However, we do update
4870 // many analyses if they are around.
Eric Christopherda6bd452011-02-10 01:48:24 +00004871 AU.addPreservedID(LoopSimplifyID);
Dan Gohman45774ce2010-02-12 10:34:29 +00004872
Eric Christopherda6bd452011-02-10 01:48:24 +00004873 AU.addRequired<LoopInfo>();
4874 AU.addPreserved<LoopInfo>();
4875 AU.addRequiredID(LoopSimplifyID);
Chandler Carruth73523022014-01-13 13:07:17 +00004876 AU.addRequired<DominatorTreeWrapperPass>();
4877 AU.addPreserved<DominatorTreeWrapperPass>();
Dan Gohman45774ce2010-02-12 10:34:29 +00004878 AU.addRequired<ScalarEvolution>();
4879 AU.addPreserved<ScalarEvolution>();
Cameron Zwarich97dae4d2011-02-10 23:53:14 +00004880 // Requiring LoopSimplify a second time here prevents IVUsers from running
4881 // twice, since LoopSimplify was invalidated by running ScalarEvolution.
4882 AU.addRequiredID(LoopSimplifyID);
Dan Gohman45774ce2010-02-12 10:34:29 +00004883 AU.addRequired<IVUsers>();
4884 AU.addPreserved<IVUsers>();
Chandler Carruth26c59fa2013-01-07 14:41:08 +00004885 AU.addRequired<TargetTransformInfo>();
Dan Gohman45774ce2010-02-12 10:34:29 +00004886}
4887
4888bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
Paul Robinsonaf4e64d2014-02-06 00:07:05 +00004889 if (skipOptnoneFunction(L))
4890 return false;
4891
Dan Gohman45774ce2010-02-12 10:34:29 +00004892 bool Changed = false;
4893
4894 // Run the main LSR transformation.
Chandler Carruth26c59fa2013-01-07 14:41:08 +00004895 Changed |= LSRInstance(L, this).getChanged();
Dan Gohman45774ce2010-02-12 10:34:29 +00004896
Andrew Trick2ec61a82012-01-07 01:36:44 +00004897 // Remove any extra phis created by processing inner loops.
Dan Gohmanb5358002010-01-05 16:31:45 +00004898 Changed |= DeleteDeadPHIs(L->getHeader());
Andrew Trickf950ce82013-01-06 05:59:39 +00004899 if (EnablePhiElim && L->isLoopSimplifyForm()) {
Andrew Trick2ec61a82012-01-07 01:36:44 +00004900 SmallVector<WeakVH, 16> DeadInsts;
4901 SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), "lsr");
4902#ifndef NDEBUG
4903 Rewriter.setDebugType(DEBUG_TYPE);
4904#endif
Chandler Carruth73523022014-01-13 13:07:17 +00004905 unsigned numFolded = Rewriter.replaceCongruentIVs(
4906 L, &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), DeadInsts,
4907 &getAnalysis<TargetTransformInfo>());
Andrew Trick2ec61a82012-01-07 01:36:44 +00004908 if (numFolded) {
4909 Changed = true;
4910 DeleteTriviallyDeadInstructions(DeadInsts);
4911 DeleteDeadPHIs(L->getHeader());
4912 }
4913 }
Evan Cheng03001cb2008-07-07 19:51:32 +00004914 return Changed;
Nate Begemanb18121e2004-10-18 21:08:22 +00004915}