blob: a5b4fa4bcb7399eec7238284b0d0f32f01a1796c [file] [log] [blame]
Dan Gohman2d1be872009-04-16 03:18:22 +00001//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
Misha Brukmanfd939082005-04-21 23:48:37 +00002//
Nate Begemaneaa13852004-10-18 21:08:22 +00003// The LLVM Compiler Infrastructure
4//
Chris Lattner4ee451d2007-12-29 20:36:04 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Misha Brukmanfd939082005-04-21 23:48:37 +00007//
Nate Begemaneaa13852004-10-18 21:08:22 +00008//===----------------------------------------------------------------------===//
9//
Dan Gohmancec8f9d2009-05-19 20:37:36 +000010// This transformation analyzes and transforms the induction variables (and
11// computations derived from them) into forms suitable for efficient execution
12// on the target.
13//
Nate Begemaneaa13852004-10-18 21:08:22 +000014// This pass performs a strength reduction on array references inside loops that
Dan Gohmancec8f9d2009-05-19 20:37:36 +000015// have as one or more of their components the loop induction variable, it
16// rewrites expressions to take advantage of scaled-index addressing modes
17// available on the target, and it performs a variety of other optimizations
18// related to loop induction variables.
Nate Begemaneaa13852004-10-18 21:08:22 +000019//
Dan Gohmana10756e2010-01-21 02:09:26 +000020// Terminology note: this code has a lot of handling for "post-increment" or
21// "post-inc" users. This is not talking about post-increment addressing modes;
22// it is instead talking about code like this:
23//
24// %i = phi [ 0, %entry ], [ %i.next, %latch ]
25// ...
26// %i.next = add %i, 1
27// %c = icmp eq %i.next, %n
28//
29// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
30// it's useful to think about these as the same register, with some uses using
31// the value of the register before the add and some using // it after. In this
32// example, the icmp is a post-increment user, since it uses %i.next, which is
33// the value of the induction variable after the increment. The other common
34// case of post-increment users is users outside the loop.
35//
36// TODO: More sophistication in the way Formulae are generated.
37//
38// TODO: Handle multiple loops at a time.
39//
40// TODO: test/CodeGen/X86/full-lsr.ll should get full lsr. The problem is
41// that {0,+,1}<%bb> is getting picked first because all 7 uses can
42// use it, and while it's a pretty good solution, it means that LSR
43// doesn't look further to find an even better solution.
44//
45// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
46// instead of a GlobalValue?
47//
48// TODO: When truncation is free, truncate ICmp users' operands to make it a
49// smaller encoding (on x86 at least).
50//
51// TODO: When a negated register is used by an add (such as in a list of
52// multiple base registers, or as the increment expression in an addrec),
53// we may not actually need both reg and (-1 * reg) in registers; the
54// negation can be implemented by using a sub instead of an add. The
55// lack of support for taking this into consideration when making
56// register pressure decisions is partly worked around by the "Special"
57// use kind.
58//
Nate Begemaneaa13852004-10-18 21:08:22 +000059//===----------------------------------------------------------------------===//
60
Chris Lattnerbe3e5212005-08-03 23:30:08 +000061#define DEBUG_TYPE "loop-reduce"
Nate Begemaneaa13852004-10-18 21:08:22 +000062#include "llvm/Transforms/Scalar.h"
63#include "llvm/Constants.h"
64#include "llvm/Instructions.h"
Dan Gohmane5b01be2007-05-04 14:59:09 +000065#include "llvm/IntrinsicInst.h"
Jeff Cohen2f3c9b72005-03-04 04:04:26 +000066#include "llvm/DerivedTypes.h"
Dan Gohman81db61a2009-05-12 02:17:14 +000067#include "llvm/Analysis/IVUsers.h"
Dan Gohmana10756e2010-01-21 02:09:26 +000068#include "llvm/Analysis/Dominators.h"
Devang Patel0f54dcb2007-03-06 21:14:09 +000069#include "llvm/Analysis/LoopPass.h"
Nate Begeman16997482005-07-30 00:15:07 +000070#include "llvm/Analysis/ScalarEvolutionExpander.h"
Chris Lattnere0391be2005-08-12 22:06:11 +000071#include "llvm/Transforms/Utils/BasicBlockUtils.h"
Nate Begemaneaa13852004-10-18 21:08:22 +000072#include "llvm/Transforms/Utils/Local.h"
Dan Gohmana10756e2010-01-21 02:09:26 +000073#include "llvm/ADT/SmallBitVector.h"
74#include "llvm/ADT/SetVector.h"
Nate Begeman16997482005-07-30 00:15:07 +000075#include "llvm/Support/Debug.h"
Dan Gohmanafc36a92009-05-02 18:29:22 +000076#include "llvm/Support/ValueHandle.h"
Daniel Dunbar460f6562009-07-26 09:48:23 +000077#include "llvm/Support/raw_ostream.h"
Evan Chengd277f2c2006-03-13 23:14:23 +000078#include "llvm/Target/TargetLowering.h"
Jeff Cohencfb1d422005-07-30 18:22:27 +000079#include <algorithm>
Nate Begemaneaa13852004-10-18 21:08:22 +000080using namespace llvm;
81
Dan Gohmana10756e2010-01-21 02:09:26 +000082namespace {
Nate Begemaneaa13852004-10-18 21:08:22 +000083
Dan Gohmana10756e2010-01-21 02:09:26 +000084// Constant strides come first which in turns are sorted by their absolute
85// values. If absolute values are the same, then positive strides comes first.
86// e.g.
87// 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X
88struct StrideCompare {
89 const ScalarEvolution &SE;
90 explicit StrideCompare(const ScalarEvolution &se) : SE(se) {}
91
92 bool operator()(const SCEV *const &LHS, const SCEV *const &RHS) const {
93 const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS);
94 const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
95 if (LHSC && RHSC) {
96 unsigned BitWidth = std::max(SE.getTypeSizeInBits(LHS->getType()),
97 SE.getTypeSizeInBits(RHS->getType()));
98 APInt LV = LHSC->getValue()->getValue();
99 APInt RV = RHSC->getValue()->getValue();
100 LV.sextOrTrunc(BitWidth);
101 RV.sextOrTrunc(BitWidth);
102 APInt ALV = LV.abs();
103 APInt ARV = RV.abs();
104 if (ALV == ARV) {
105 if (LV != RV)
106 return LV.sgt(RV);
107 } else {
108 return ALV.ult(ARV);
109 }
110
111 // If it's the same value but different type, sort by bit width so
112 // that we emit larger induction variables before smaller
113 // ones, letting the smaller be re-written in terms of larger ones.
114 return SE.getTypeSizeInBits(RHS->getType()) <
115 SE.getTypeSizeInBits(LHS->getType());
116 }
117 return LHSC && !RHSC;
118 }
119};
120
121/// RegSortData - This class holds data which is used to order reuse
122/// candidates.
123class RegSortData {
124public:
125 /// Bits - This represents the set of LSRUses (by index) which reference a
126 /// particular register.
127 SmallBitVector Bits;
128
129 /// MaxComplexity - This represents the greatest complexity (see the comments
130 /// on Formula::getComplexity) seen with a particular register.
131 uint32_t MaxComplexity;
132
133 /// Index - This holds an arbitrary value used as a last-resort tie breaker
134 /// to ensure deterministic behavior.
135 unsigned Index;
136
137 RegSortData() {}
138
139 void print(raw_ostream &OS) const;
140 void dump() const;
141};
142
143}
144
145void RegSortData::print(raw_ostream &OS) const {
146 OS << "[NumUses=" << Bits.count()
147 << ", MaxComplexity=";
148 OS.write_hex(MaxComplexity);
149 OS << ", Index=" << Index << ']';
150}
151
152void RegSortData::dump() const {
153 print(errs()); errs() << '\n';
154}
Dan Gohmanc17e0cf2009-02-20 04:17:46 +0000155
Chris Lattner0e5f4992006-12-19 21:40:18 +0000156namespace {
Dale Johannesendc42f482007-03-20 00:47:50 +0000157
Dan Gohmana10756e2010-01-21 02:09:26 +0000158/// RegCount - This is a helper class to sort a given set of registers
159/// according to associated RegSortData values.
160class RegCount {
161public:
162 const SCEV *Reg;
163 RegSortData Sort;
Dale Johannesendc42f482007-03-20 00:47:50 +0000164
Dan Gohmana10756e2010-01-21 02:09:26 +0000165 RegCount(const SCEV *R, const RegSortData &RSD)
166 : Reg(R), Sort(RSD) {}
Evan Chengd1d6b5c2006-03-16 21:53:05 +0000167
Dan Gohmana10756e2010-01-21 02:09:26 +0000168 // Sort by count. Returning true means the register is preferred.
169 bool operator<(const RegCount &Other) const {
170 // Sort by the number of unique uses of this register.
171 unsigned A = Sort.Bits.count();
172 unsigned B = Other.Sort.Bits.count();
173 if (A != B) return A > B;
Evan Chengd1d6b5c2006-03-16 21:53:05 +0000174
Dan Gohmana10756e2010-01-21 02:09:26 +0000175 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
176 const SCEVAddRecExpr *BR = dyn_cast<SCEVAddRecExpr>(Other.Reg);
177 // AddRecs have higher priority than other things.
178 if (!BR) return true;
Evan Chengd1d6b5c2006-03-16 21:53:05 +0000179
Dan Gohmana10756e2010-01-21 02:09:26 +0000180 // Prefer affine values.
181 if (AR->isAffine() != BR->isAffine())
182 return AR->isAffine();
Nate Begeman16997482005-07-30 00:15:07 +0000183
Dan Gohmana10756e2010-01-21 02:09:26 +0000184 const Loop *AL = AR->getLoop();
185 const Loop *BL = BR->getLoop();
186 if (AL != BL) {
187 unsigned ADepth = AL->getLoopDepth();
188 unsigned BDepth = BL->getLoopDepth();
189 // Prefer a less deeply nested addrec.
190 if (ADepth != BDepth)
191 return ADepth < BDepth;
Chris Lattner7e608bb2005-08-02 02:52:02 +0000192
Dan Gohmana10756e2010-01-21 02:09:26 +0000193 // Different loops at the same depth; do something arbitrary.
194 BasicBlock *AH = AL->getHeader();
195 BasicBlock *BH = BL->getHeader();
196 for (Function::iterator I = AH, E = AH->getParent()->end(); I != E; ++I)
197 if (&*I == BH) return true;
198 return false;
Bill Wendling411052b2008-11-29 03:43:04 +0000199 }
Jim Grosbach56a1f802009-11-17 17:53:56 +0000200
Dan Gohmana10756e2010-01-21 02:09:26 +0000201 // Sort addrecs by stride.
202 const SCEV *AStep = AR->getOperand(1);
203 const SCEV *BStep = BR->getOperand(1);
204 if (AStep != BStep) {
205 if (const SCEVConstant *AC = dyn_cast<SCEVConstant>(AStep)) {
206 const SCEVConstant *BC = dyn_cast<SCEVConstant>(BStep);
207 if (!BC) return true;
208 // Arbitrarily prefer wider registers.
209 if (AC->getValue()->getValue().getBitWidth() !=
210 BC->getValue()->getValue().getBitWidth())
211 return AC->getValue()->getValue().getBitWidth() >
212 BC->getValue()->getValue().getBitWidth();
213 // Ignore the sign bit, assuming that striding by a negative value
214 // is just as easy as by a positive value.
215 // Prefer the addrec with the lesser absolute value stride, as it
216 // will allow uses to have simpler addressing modes.
217 return AC->getValue()->getValue().abs()
218 .ult(BC->getValue()->getValue().abs());
219 }
220 }
221
222 // Then sort by the register which will permit the simplest uses.
223 // This is a heuristic; currently we only track the most complex use as a
224 // representative.
225 if (Sort.MaxComplexity != Other.Sort.MaxComplexity)
226 return Sort.MaxComplexity < Other.Sort.MaxComplexity;
227
228 // Then sort them by their start values.
229 const SCEV *AStart = AR->getStart();
230 const SCEV *BStart = BR->getStart();
231 if (AStart != BStart) {
232 if (const SCEVConstant *AC = dyn_cast<SCEVConstant>(AStart)) {
233 const SCEVConstant *BC = dyn_cast<SCEVConstant>(BStart);
234 if (!BC) return true;
235 // Arbitrarily prefer wider registers.
236 if (AC->getValue()->getValue().getBitWidth() !=
237 BC->getValue()->getValue().getBitWidth())
238 return AC->getValue()->getValue().getBitWidth() >
239 BC->getValue()->getValue().getBitWidth();
240 // Prefer positive over negative if the absolute values are the same.
241 if (AC->getValue()->getValue().abs() ==
242 BC->getValue()->getValue().abs())
243 return AC->getValue()->getValue().isStrictlyPositive();
244 // Prefer the addrec with the lesser absolute value start.
245 return AC->getValue()->getValue().abs()
246 .ult(BC->getValue()->getValue().abs());
247 }
248 }
249 } else {
250 // AddRecs have higher priority than other things.
251 if (isa<SCEVAddRecExpr>(Other.Reg)) return false;
252 // Sort by the register which will permit the simplest uses.
253 // This is a heuristic; currently we only track the most complex use as a
254 // representative.
255 if (Sort.MaxComplexity != Other.Sort.MaxComplexity)
256 return Sort.MaxComplexity < Other.Sort.MaxComplexity;
257 }
258
259
260 // Tie-breaker: the arbitrary index, to ensure a reliable ordering.
261 return Sort.Index < Other.Sort.Index;
Nate Begemaneaa13852004-10-18 21:08:22 +0000262 }
Dan Gohmana10756e2010-01-21 02:09:26 +0000263
264 void print(raw_ostream &OS) const;
265 void dump() const;
266};
267
268}
269
270void RegCount::print(raw_ostream &OS) const {
271 OS << *Reg << ':';
272 Sort.print(OS);
273}
274
275void RegCount::dump() const {
276 print(errs()); errs() << '\n';
277}
278
279namespace {
280
281/// Formula - This class holds information that describes a formula for
282/// satisfying a use. It may include broken-out immediates and scaled registers.
283struct Formula {
284 /// AM - This is used to represent complex addressing, as well as other kinds
285 /// of interesting uses.
286 TargetLowering::AddrMode AM;
287
288 /// BaseRegs - The list of "base" registers for this use. When this is
289 /// non-empty, AM.HasBaseReg should be set to true.
290 SmallVector<const SCEV *, 2> BaseRegs;
291
292 /// ScaledReg - The 'scaled' register for this use. This should be non-null
293 /// when AM.Scale is not zero.
294 const SCEV *ScaledReg;
295
296 Formula() : ScaledReg(0) {}
297
298 unsigned getNumRegs() const;
299 uint32_t getComplexity() const;
300 const Type *getType() const;
301
302 void InitialMatch(const SCEV *S, Loop *L,
303 ScalarEvolution &SE, DominatorTree &DT);
304
305 /// referencesReg - Test if this formula references the given register.
306 bool referencesReg(const SCEV *S) const {
307 return S == ScaledReg ||
308 std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
309 }
310
311 bool operator==(const Formula &Other) const {
312 return BaseRegs == Other.BaseRegs &&
313 ScaledReg == Other.ScaledReg &&
314 AM.Scale == Other.AM.Scale &&
315 AM.BaseOffs == Other.AM.BaseOffs &&
316 AM.BaseGV == Other.AM.BaseGV;
317 }
318
319 // This sorts at least partially based on host pointer values which are
320 // not deterministic, so it is only usable for uniqification.
321 bool operator<(const Formula &Other) const {
322 if (BaseRegs != Other.BaseRegs)
323 return BaseRegs < Other.BaseRegs;
324 if (ScaledReg != Other.ScaledReg)
325 return ScaledReg < Other.ScaledReg;
326 if (AM.Scale != Other.AM.Scale)
327 return AM.Scale < Other.AM.Scale;
328 if (AM.BaseOffs != Other.AM.BaseOffs)
329 return AM.BaseOffs < Other.AM.BaseOffs;
330 if (AM.BaseGV != Other.AM.BaseGV)
331 return AM.BaseGV < Other.AM.BaseGV;
332 return false;
333 }
334
335 void print(raw_ostream &OS) const;
336 void dump() const;
337};
338
339}
340
341/// getNumRegs - Return the total number of register operands used by this
Dan Gohman2f524582010-01-21 21:31:09 +0000342/// formula. This does not include register uses implied by non-constant
343/// addrec strides.
Dan Gohmana10756e2010-01-21 02:09:26 +0000344unsigned Formula::getNumRegs() const {
345 return !!ScaledReg + BaseRegs.size();
346}
347
348/// getComplexity - Return an oversimplified value indicating the complexity
349/// of this formula. This is used as a tie-breaker in choosing register
350/// preferences.
351uint32_t Formula::getComplexity() const {
352 // Encode the information in a uint32_t so that comparing with operator<
353 // will be interesting.
354 return
355 // Most significant, the number of registers. This saturates because we
356 // need the bits, and because beyond a few hundred it doesn't really matter.
357 (std::min(getNumRegs(), (1u<<15)-1) << 17) |
358 // Having multiple base regs is worse than having a base reg and a scale.
359 ((BaseRegs.size() > 1) << 16) |
360 // Scale absolute value.
361 ((AM.Scale != 0 ? (Log2_64(abs64(AM.Scale)) + 1) : 0u) << 9) |
362 // Scale sign, which is less significant than the absolute value.
363 ((AM.Scale < 0) << 8) |
364 // Offset absolute value.
365 ((AM.BaseOffs != 0 ? (Log2_64(abs64(AM.BaseOffs)) + 1) : 0u) << 1) |
366 // If a GV is present, treat it like a maximal offset.
367 ((AM.BaseGV ? ((1u<<7)-1) : 0) << 1) |
368 // Offset sign, which is less significant than the absolute offset.
369 ((AM.BaseOffs < 0) << 0);
370}
371
372/// getType - Return the type of this formula, if it has one, or null
373/// otherwise. This type is meaningless except for the bit size.
374const Type *Formula::getType() const {
375 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
376 ScaledReg ? ScaledReg->getType() :
377 AM.BaseGV ? AM.BaseGV->getType() :
378 0;
379}
380
381namespace {
382
383/// ComplexitySorter - A predicate which orders Formulae by the number of
384/// registers they contain.
385struct ComplexitySorter {
386 bool operator()(const Formula &LHS, const Formula &RHS) const {
387 unsigned L = LHS.getNumRegs();
388 unsigned R = RHS.getNumRegs();
389 if (L != R) return L < R;
390
391 return LHS.getComplexity() < RHS.getComplexity();
392 }
393};
394
395}
396
397/// DoInitialMatch - Recurrsion helper for InitialMatch.
398static void DoInitialMatch(const SCEV *S, Loop *L,
399 SmallVectorImpl<const SCEV *> &Good,
400 SmallVectorImpl<const SCEV *> &Bad,
401 ScalarEvolution &SE, DominatorTree &DT) {
402 // Collect expressions which properly dominate the loop header.
403 if (S->properlyDominates(L->getHeader(), &DT)) {
404 Good.push_back(S);
405 return;
406 }
407
408 // Look at add operands.
409 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
410 for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
411 I != E; ++I)
412 DoInitialMatch(*I, L, Good, Bad, SE, DT);
413 return;
414 }
415
416 // Look at addrec operands.
417 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
418 if (!AR->getStart()->isZero()) {
419 DoInitialMatch(AR->getStart(), L, Good, Bad, SE, DT);
420 DoInitialMatch(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()),
421 AR->getStepRecurrence(SE),
422 AR->getLoop()),
423 L, Good, Bad, SE, DT);
424 return;
425 }
426 }
427
428 // Handle a multiplication by -1 (negation) if it didn't fold.
429 if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
430 if (Mul->getOperand(0)->isAllOnesValue()) {
431 SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end());
432 const SCEV *NewMul = SE.getMulExpr(Ops);
433
434 SmallVector<const SCEV *, 4> MyGood;
435 SmallVector<const SCEV *, 4> MyBad;
436 DoInitialMatch(NewMul, L, MyGood, MyBad, SE, DT);
437 const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
438 SE.getEffectiveSCEVType(NewMul->getType())));
439 for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(),
440 E = MyGood.end(); I != E; ++I)
441 Good.push_back(SE.getMulExpr(NegOne, *I));
442 for (SmallVectorImpl<const SCEV *>::const_iterator I = MyBad.begin(),
443 E = MyBad.end(); I != E; ++I)
444 Bad.push_back(SE.getMulExpr(NegOne, *I));
445 return;
446 }
447
448 // Ok, we can't do anything interesting. Just stuff the whole thing into a
449 // register and hope for the best.
450 Bad.push_back(S);
451}
452
453/// InitialMatch - Incorporate loop-variant parts of S into this Formula,
454/// attempting to keep all loop-invariant and loop-computable values in a
455/// single base register.
456void Formula::InitialMatch(const SCEV *S, Loop *L,
457 ScalarEvolution &SE, DominatorTree &DT) {
458 SmallVector<const SCEV *, 4> Good;
459 SmallVector<const SCEV *, 4> Bad;
460 DoInitialMatch(S, L, Good, Bad, SE, DT);
461 if (!Good.empty()) {
462 BaseRegs.push_back(SE.getAddExpr(Good));
463 AM.HasBaseReg = true;
464 }
465 if (!Bad.empty()) {
466 BaseRegs.push_back(SE.getAddExpr(Bad));
467 AM.HasBaseReg = true;
468 }
469}
470
471void Formula::print(raw_ostream &OS) const {
472 bool First = true;
473 if (AM.BaseGV) {
474 if (!First) OS << " + "; else First = false;
475 WriteAsOperand(OS, AM.BaseGV, /*PrintType=*/false);
476 }
477 if (AM.BaseOffs != 0) {
478 if (!First) OS << " + "; else First = false;
479 OS << AM.BaseOffs;
480 }
481 for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
482 E = BaseRegs.end(); I != E; ++I) {
483 if (!First) OS << " + "; else First = false;
484 OS << "reg(";
485 OS << **I;
486 OS << ")";
487 }
488 if (AM.Scale != 0) {
489 if (!First) OS << " + "; else First = false;
490 OS << AM.Scale << "*reg(";
491 if (ScaledReg)
492 OS << *ScaledReg;
493 else
494 OS << "<unknown>";
495 OS << ")";
496 }
497}
498
499void Formula::dump() const {
500 print(errs()); errs() << '\n';
501}
502
503/// getSDiv - Return an expression for LHS /s RHS, if it can be determined,
504/// or null otherwise. If IgnoreSignificantBits is true, expressions like
505/// (X * Y) /s Y are simplified to Y, ignoring that the multiplication may
506/// overflow, which is useful when the result will be used in a context where
507/// the most significant bits are ignored.
508static const SCEV *getSDiv(const SCEV *LHS, const SCEV *RHS,
509 ScalarEvolution &SE,
510 bool IgnoreSignificantBits = false) {
511 // Handle the trivial case, which works for any SCEV type.
512 if (LHS == RHS)
513 return SE.getIntegerSCEV(1, LHS->getType());
514
515 // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do some
516 // folding.
517 if (RHS->isAllOnesValue())
518 return SE.getMulExpr(LHS, RHS);
519
520 // Check for a division of a constant by a constant.
521 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
522 const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
523 if (!RC)
524 return 0;
525 if (C->getValue()->getValue().srem(RC->getValue()->getValue()) != 0)
526 return 0;
527 return SE.getConstant(C->getValue()->getValue()
528 .sdiv(RC->getValue()->getValue()));
529 }
530
531 // Distribute the sdiv over addrec operands.
532 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
533 const SCEV *Start = getSDiv(AR->getStart(), RHS, SE,
534 IgnoreSignificantBits);
535 if (!Start) return 0;
536 const SCEV *Step = getSDiv(AR->getStepRecurrence(SE), RHS, SE,
537 IgnoreSignificantBits);
538 if (!Step) return 0;
539 return SE.getAddRecExpr(Start, Step, AR->getLoop());
540 }
541
542 // Distribute the sdiv over add operands.
543 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
544 SmallVector<const SCEV *, 8> Ops;
545 for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
546 I != E; ++I) {
547 const SCEV *Op = getSDiv(*I, RHS, SE,
548 IgnoreSignificantBits);
549 if (!Op) return 0;
550 Ops.push_back(Op);
551 }
552 return SE.getAddExpr(Ops);
553 }
554
555 // Check for a multiply operand that we can pull RHS out of.
556 if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS))
557 if (IgnoreSignificantBits || Mul->hasNoSignedWrap()) {
558 SmallVector<const SCEV *, 4> Ops;
559 bool Found = false;
560 for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end();
561 I != E; ++I) {
562 if (!Found)
563 if (const SCEV *Q = getSDiv(*I, RHS, SE, IgnoreSignificantBits)) {
564 Ops.push_back(Q);
565 Found = true;
566 continue;
567 }
568 Ops.push_back(*I);
569 }
570 return Found ? SE.getMulExpr(Ops) : 0;
571 }
572
573 // Otherwise we don't know.
574 return 0;
575}
576
577namespace {
578
579/// LSRUse - This class holds the state that LSR keeps for each use in
580/// IVUsers, as well as uses invented by LSR itself. It includes information
581/// about what kinds of things can be folded into the user, information
582/// about the user itself, and information about how the use may be satisfied.
583/// TODO: Represent multiple users of the same expression in common?
584class LSRUse {
585 SmallSet<Formula, 8> FormulaeUniquifier;
586
587public:
588 /// KindType - An enum for a kind of use, indicating what types of
589 /// scaled and immediate operands it might support.
590 enum KindType {
591 Basic, ///< A normal use, with no folding.
592 Special, ///< A special case of basic, allowing -1 scales.
593 Address, ///< An address use; folding according to TargetLowering
594 ICmpZero ///< An equality icmp with both operands folded into one.
595 // TODO: Add a generic icmp too?
596 };
597
598 KindType Kind;
599 const Type *AccessTy;
600 Instruction *UserInst;
601 Value *OperandValToReplace;
602
603 /// PostIncLoop - If this user is to use the post-incremented value of an
604 /// induction variable, this variable is non-null and holds the loop
605 /// associated with the induction variable.
606 const Loop *PostIncLoop;
607
608 /// Formulae - A list of ways to build a value that can satisfy this user.
609 /// After the list is populated, one of these is selected heuristically and
610 /// used to formulate a replacement for OperandValToReplace in UserInst.
611 SmallVector<Formula, 12> Formulae;
612
613 LSRUse() : Kind(Basic), AccessTy(0),
614 UserInst(0), OperandValToReplace(0), PostIncLoop(0) {}
615
616 void InsertInitialFormula(const SCEV *S, Loop *L,
617 ScalarEvolution &SE, DominatorTree &DT);
618 void InsertSupplementalFormula(const SCEV *S);
619
620 bool InsertFormula(const Formula &F);
621
622 void Rewrite(Loop *L, SCEVExpander &Rewriter,
623 SmallVectorImpl<WeakVH> &DeadInsts,
624 ScalarEvolution &SE, DominatorTree &DT,
625 Pass *P) const;
626
627 void print(raw_ostream &OS) const;
628 void dump() const;
629
630private:
631 Value *Expand(BasicBlock::iterator IP,
632 Loop *L, SCEVExpander &Rewriter,
633 SmallVectorImpl<WeakVH> &DeadInsts,
634 ScalarEvolution &SE, DominatorTree &DT) const;
635};
636
637}
638
639/// ExtractImmediate - If S involves the addition of a constant integer value,
640/// return that integer value, and mutate S to point to a new SCEV with that
641/// value excluded.
642static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
643 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
644 if (C->getValue()->getValue().getMinSignedBits() <= 64) {
645 S = SE.getIntegerSCEV(0, C->getType());
646 return C->getValue()->getSExtValue();
647 }
648 } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
649 SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
650 int64_t Result = ExtractImmediate(NewOps.front(), SE);
651 S = SE.getAddExpr(NewOps);
652 return Result;
653 } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
654 SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
655 int64_t Result = ExtractImmediate(NewOps.front(), SE);
656 S = SE.getAddRecExpr(NewOps, AR->getLoop());
657 return Result;
658 }
659 return 0;
660}
661
662/// ExtractSymbol - If S involves the addition of a GlobalValue address,
663/// return that symbol, and mutate S to point to a new SCEV with that
664/// value excluded.
665static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
666 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
667 if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
668 S = SE.getIntegerSCEV(0, GV->getType());
669 return GV;
670 }
671 } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
672 SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
673 GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
674 S = SE.getAddExpr(NewOps);
675 return Result;
676 } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
677 SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
678 GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
679 S = SE.getAddRecExpr(NewOps, AR->getLoop());
680 return Result;
681 }
682 return 0;
683}
684
685/// isLegalUse - Test whether the use described by AM is "legal", meaning
686/// it can be completely folded into the user instruction at isel time.
687/// This includes address-mode folding and special icmp tricks.
688static bool isLegalUse(const TargetLowering::AddrMode &AM,
689 LSRUse::KindType Kind, const Type *AccessTy,
690 const TargetLowering *TLI) {
691 switch (Kind) {
692 case LSRUse::Address:
693 // If we have low-level target information, ask the target if it can
694 // completely fold this address.
695 if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy);
696
697 // Otherwise, just guess that reg+reg addressing is legal.
698 return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1;
699
700 case LSRUse::ICmpZero:
701 // There's not even a target hook for querying whether it would be legal
702 // to fold a GV into an ICmp.
703 if (AM.BaseGV)
704 return false;
705
706 // ICmp only has two operands; don't allow more than two non-trivial parts.
707 if (AM.Scale != 0 && AM.HasBaseReg && AM.BaseOffs != 0)
708 return false;
709
710 // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale
711 // by putting the scaled register in the other operand of the icmp.
712 if (AM.Scale != 0 && AM.Scale != -1)
713 return false;
714
715 // If we have low-level target information, ask the target if it can
716 // fold an integer immediate on an icmp.
717 if (AM.BaseOffs != 0) {
718 if (TLI) return TLI->isLegalICmpImmediate(-AM.BaseOffs);
719 return false;
720 }
721
722 return true;
723
724 case LSRUse::Basic:
725 // Only handle single-register values.
726 return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
727
728 case LSRUse::Special:
729 // Only handle -1 scales, or no scale.
730 return AM.Scale == 0 || AM.Scale == -1;
731 }
732
733 return false;
734}
735
736static bool isAlwaysFoldable(const SCEV *S,
737 bool HasBaseReg,
738 LSRUse::KindType Kind, const Type *AccessTy,
739 const TargetLowering *TLI,
740 ScalarEvolution &SE) {
741 // Fast-path: zero is always foldable.
742 if (S->isZero()) return true;
743
744 // Conservatively, create an address with an immediate and a
745 // base and a scale.
746 TargetLowering::AddrMode AM;
747 AM.BaseOffs = ExtractImmediate(S, SE);
748 AM.BaseGV = ExtractSymbol(S, SE);
749 AM.HasBaseReg = HasBaseReg;
750 AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
751
752 // If there's anything else involved, it's not foldable.
753 if (!S->isZero()) return false;
754
755 return isLegalUse(AM, Kind, AccessTy, TLI);
756}
757
758/// InsertFormula - If the given formula has not yet been inserted, add it
759/// to the list, and return true. Return false otherwise.
760bool LSRUse::InsertFormula(const Formula &F) {
761 Formula Copy = F;
762
763 // Sort the base regs, to avoid adding the same solution twice with
764 // the base regs in different orders. This uses host pointer values, but
765 // it doesn't matter since it's only used for uniquifying.
766 std::sort(Copy.BaseRegs.begin(), Copy.BaseRegs.end());
767
768 DEBUG(for (SmallVectorImpl<const SCEV *>::const_iterator I =
769 F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I)
770 assert(!(*I)->isZero() && "Zero allocated in a base register!");
771 assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
772 "Zero allocated in a scaled register!"));
773
774 if (FormulaeUniquifier.insert(Copy)) {
775 Formulae.push_back(F);
776 return true;
777 }
778
779 return false;
780}
781
782void
783LSRUse::InsertInitialFormula(const SCEV *S, Loop *L,
784 ScalarEvolution &SE, DominatorTree &DT) {
785 Formula F;
786 F.InitialMatch(S, L, SE, DT);
787 bool Inserted = InsertFormula(F);
788 assert(Inserted && "Initial formula already exists!"); (void)Inserted;
789}
790
791void
792LSRUse::InsertSupplementalFormula(const SCEV *S) {
793 Formula F;
794 F.BaseRegs.push_back(S);
795 F.AM.HasBaseReg = true;
796 bool Inserted = InsertFormula(F);
797 assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
798}
799
800/// getImmediateDominator - A handy utility for the specific DominatorTree
801/// query that we need here.
802///
803static BasicBlock *getImmediateDominator(BasicBlock *BB, DominatorTree &DT) {
804 DomTreeNode *Node = DT.getNode(BB);
805 if (!Node) return 0;
806 Node = Node->getIDom();
807 if (!Node) return 0;
808 return Node->getBlock();
809}
810
811Value *LSRUse::Expand(BasicBlock::iterator IP,
812 Loop *L, SCEVExpander &Rewriter,
813 SmallVectorImpl<WeakVH> &DeadInsts,
814 ScalarEvolution &SE, DominatorTree &DT) const {
815 // Then, collect some instructions which we will remain dominated by when
816 // expanding the replacement. These must be dominated by any operands that
817 // will be required in the expansion.
818 SmallVector<Instruction *, 4> Inputs;
819 if (Instruction *I = dyn_cast<Instruction>(OperandValToReplace))
820 Inputs.push_back(I);
821 if (Kind == ICmpZero)
822 if (Instruction *I =
823 dyn_cast<Instruction>(cast<ICmpInst>(UserInst)->getOperand(1)))
824 Inputs.push_back(I);
825 if (PostIncLoop && !L->contains(UserInst))
826 Inputs.push_back(L->getLoopLatch()->getTerminator());
827
828 // Then, climb up the immediate dominator tree as far as we can go while
829 // still being dominated by the input positions.
830 for (;;) {
831 bool AllDominate = true;
832 Instruction *BetterPos = 0;
833 BasicBlock *IDom = getImmediateDominator(IP->getParent(), DT);
834 if (!IDom) break;
835 Instruction *Tentative = IDom->getTerminator();
836 for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
837 E = Inputs.end(); I != E; ++I) {
838 Instruction *Inst = *I;
839 if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
840 AllDominate = false;
841 break;
842 }
843 if (IDom == Inst->getParent() &&
844 (!BetterPos || DT.dominates(BetterPos, Inst)))
845 BetterPos = next(BasicBlock::iterator(Inst));
846 }
847 if (!AllDominate)
848 break;
849 if (BetterPos)
850 IP = BetterPos;
851 else
852 IP = Tentative;
853 }
854 while (isa<PHINode>(IP)) ++IP;
855
856 // The first formula in the list is the winner.
857 const Formula &F = Formulae.front();
858
859 // Inform the Rewriter if we have a post-increment use, so that it can
860 // perform an advantageous expansion.
861 Rewriter.setPostInc(PostIncLoop);
862
863 // This is the type that the user actually needs.
864 const Type *OpTy = OperandValToReplace->getType();
865 // This will be the type that we'll initially expand to.
866 const Type *Ty = F.getType();
867 if (!Ty)
868 // No type known; just expand directly to the ultimate type.
869 Ty = OpTy;
870 else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
871 // Expand directly to the ultimate type if it's the right size.
872 Ty = OpTy;
873 // This is the type to do integer arithmetic in.
874 const Type *IntTy = SE.getEffectiveSCEVType(Ty);
875
876 // Build up a list of operands to add together to form the full base.
877 SmallVector<const SCEV *, 8> Ops;
878
879 // Expand the BaseRegs portion.
880 for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
881 E = F.BaseRegs.end(); I != E; ++I) {
882 const SCEV *Reg = *I;
883 assert(!Reg->isZero() && "Zero allocated in a base register!");
884
885 // If we're expanding for a post-inc user for the add-rec's loop, make the
886 // post-inc adjustment.
887 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg))
888 if (AR->getLoop() == PostIncLoop)
889 Reg = SE.getAddExpr(Reg, AR->getStepRecurrence(SE));
890
891 Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
892 }
893
894 // Expand the ScaledReg portion.
895 Value *ICmpScaledV = 0;
896 if (F.AM.Scale != 0) {
897 const SCEV *ScaledS = F.ScaledReg;
898
899 // If we're expanding for a post-inc user for the add-rec's loop, make the
900 // post-inc adjustment.
901 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ScaledS))
902 if (AR->getLoop() == PostIncLoop)
903 ScaledS = SE.getAddExpr(ScaledS, AR->getStepRecurrence(SE));
904
905 if (Kind == ICmpZero) {
906 // An interesting way of "folding" with an icmp is to use a negated
907 // scale, which we'll implement by inserting it into the other operand
908 // of the icmp.
909 assert(F.AM.Scale == -1 &&
910 "The only scale supported by ICmpZero uses is -1!");
911 ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP);
912 } else {
913 // Otherwise just expand the scaled register and an explicit scale,
914 // which is expected to be matched as part of the address.
915 ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
916 const Type *ScaledTy = SE.getEffectiveSCEVType(ScaledS->getType());
917 ScaledS = SE.getMulExpr(ScaledS,
918 SE.getSCEV(ConstantInt::get(ScaledTy,
919 F.AM.Scale)));
920 Ops.push_back(ScaledS);
921 }
922 }
923
924 // Expand the immediate portions.
925 if (F.AM.BaseGV)
926 Ops.push_back(SE.getSCEV(F.AM.BaseGV));
927 if (F.AM.BaseOffs != 0) {
928 if (Kind == ICmpZero) {
929 // The other interesting way of "folding" with an ICmpZero is to use a
930 // negated immediate.
931 if (!ICmpScaledV)
932 ICmpScaledV = ConstantInt::get(IntTy, -F.AM.BaseOffs);
933 else {
934 Ops.push_back(SE.getUnknown(ICmpScaledV));
935 ICmpScaledV = ConstantInt::get(IntTy, F.AM.BaseOffs);
936 }
937 } else {
938 // Just add the immediate values. These again are expected to be matched
939 // as part of the address.
940 Ops.push_back(SE.getSCEV(ConstantInt::get(IntTy, F.AM.BaseOffs)));
941 }
942 }
943
944 // Emit instructions summing all the operands.
945 const SCEV *FullS = Ops.empty() ?
946 SE.getIntegerSCEV(0, IntTy) :
947 SE.getAddExpr(Ops);
948 Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
949
950 // We're done expanding now, so reset the rewriter.
951 Rewriter.setPostInc(0);
952
953 // An ICmpZero Formula represents an ICmp which we're handling as a
954 // comparison against zero. Now that we've expanded an expression for that
955 // form, update the ICmp's other operand.
956 if (Kind == ICmpZero) {
957 ICmpInst *CI = cast<ICmpInst>(UserInst);
958 DeadInsts.push_back(CI->getOperand(1));
959 assert(!F.AM.BaseGV && "ICmp does not support folding a global value and "
960 "a scale at the same time!");
961 if (F.AM.Scale == -1) {
962 if (ICmpScaledV->getType() != OpTy) {
963 Instruction *Cast =
964 CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
965 OpTy, false),
966 ICmpScaledV, OpTy, "tmp", CI);
967 ICmpScaledV = Cast;
968 }
969 CI->setOperand(1, ICmpScaledV);
970 } else {
971 assert(F.AM.Scale == 0 &&
972 "ICmp does not support folding a global value and "
973 "a scale at the same time!");
974 Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
975 -(uint64_t)F.AM.BaseOffs);
976 if (C->getType() != OpTy)
977 C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
978 OpTy, false),
979 C, OpTy);
980
981 CI->setOperand(1, C);
982 }
983 }
984
985 return FullV;
986}
987
988/// Rewrite - Emit instructions for the leading candidate expression for this
989/// LSRUse (this is called "expanding"), and update the UserInst to reference
990/// the newly expanded value.
991void LSRUse::Rewrite(Loop *L, SCEVExpander &Rewriter,
992 SmallVectorImpl<WeakVH> &DeadInsts,
993 ScalarEvolution &SE, DominatorTree &DT,
994 Pass *P) const {
995 const Type *OpTy = OperandValToReplace->getType();
996
997 // First, find an insertion point that dominates UserInst. For PHI nodes,
998 // find the nearest block which dominates all the relevant uses.
999 if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1000 DenseMap<BasicBlock *, Value *> Inserted;
1001 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1002 if (PN->getIncomingValue(i) == OperandValToReplace) {
1003 BasicBlock *BB = PN->getIncomingBlock(i);
1004
1005 // If this is a critical edge, split the edge so that we do not insert
1006 // the code on all predecessor/successor paths. We do this unless this
1007 // is the canonical backedge for this loop, which complicates post-inc
1008 // users.
1009 if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
1010 !isa<IndirectBrInst>(BB->getTerminator()) &&
1011 (PN->getParent() != L->getHeader() || !L->contains(BB))) {
1012 // Split the critical edge.
1013 BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
1014
1015 // If PN is outside of the loop and BB is in the loop, we want to
1016 // move the block to be immediately before the PHI block, not
1017 // immediately after BB.
1018 if (L->contains(BB) && !L->contains(PN))
1019 NewBB->moveBefore(PN->getParent());
1020
1021 // Splitting the edge can reduce the number of PHI entries we have.
1022 e = PN->getNumIncomingValues();
1023 BB = NewBB;
1024 i = PN->getBasicBlockIndex(BB);
1025 }
1026
1027 std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
1028 Inserted.insert(std::make_pair(BB, static_cast<Value *>(0)));
1029 if (!Pair.second)
1030 PN->setIncomingValue(i, Pair.first->second);
1031 else {
1032 Value *FullV = Expand(BB->getTerminator(),
1033 L, Rewriter, DeadInsts, SE, DT);
1034
1035 // If this is reuse-by-noop-cast, insert the noop cast.
1036 if (FullV->getType() != OpTy)
1037 FullV =
1038 CastInst::Create(CastInst::getCastOpcode(FullV, false,
1039 OpTy, false),
1040 FullV, OperandValToReplace->getType(),
1041 "tmp", BB->getTerminator());
1042
1043 PN->setIncomingValue(i, FullV);
1044 Pair.first->second = FullV;
1045 }
1046 }
1047 } else {
1048 Value *FullV = Expand(UserInst, L, Rewriter, DeadInsts, SE, DT);
1049
1050 // If this is reuse-by-noop-cast, insert the noop cast.
1051 if (FullV->getType() != OpTy) {
1052 Instruction *Cast =
1053 CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
1054 FullV, OpTy, "tmp", UserInst);
1055 FullV = Cast;
1056 }
1057
1058 // Update the user.
1059 UserInst->replaceUsesOfWith(OperandValToReplace, FullV);
1060 }
1061
1062 DeadInsts.push_back(OperandValToReplace);
1063}
1064
1065void LSRUse::print(raw_ostream &OS) const {
1066 OS << "LSR Use: Kind=";
1067 switch (Kind) {
1068 case Basic: OS << "Basic"; break;
1069 case Special: OS << "Special"; break;
1070 case ICmpZero: OS << "ICmpZero"; break;
1071 case Address:
1072 OS << "Address of ";
1073 if (isa<PointerType>(AccessTy))
1074 OS << "pointer"; // the full pointer type could be really verbose
1075 else
1076 OS << *AccessTy;
1077 }
1078
1079 OS << ", UserInst=";
1080 // Store is common and interesting enough to be worth special-casing.
1081 if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1082 OS << "store ";
1083 WriteAsOperand(OS, Store->getOperand(0), /*PrintType=*/false);
1084 } else if (UserInst->getType()->isVoidTy())
1085 OS << UserInst->getOpcodeName();
1086 else
1087 WriteAsOperand(OS, UserInst, /*PrintType=*/false);
1088
1089 OS << ", OperandValToReplace=";
1090 WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false);
1091
1092 if (PostIncLoop) {
1093 OS << ", PostIncLoop=";
1094 WriteAsOperand(OS, PostIncLoop->getHeader(), /*PrintType=*/false);
1095 }
1096}
1097
1098void LSRUse::dump() const {
1099 print(errs()); errs() << '\n';
1100}
1101
1102namespace {
1103
1104/// Score - This class is used to measure and compare candidate formulae.
1105class Score {
1106 unsigned NumRegs;
1107 unsigned NumPhis;
1108 unsigned NumIVMuls;
1109 unsigned NumBaseAdds;
1110 unsigned NumImms;
1111
1112public:
1113 Score()
1114 : NumRegs(0), NumPhis(0), NumIVMuls(0), NumBaseAdds(0), NumImms(0) {}
1115
1116 void RateInitial(SmallVector<LSRUse, 16> const &Uses, const Loop *L,
1117 ScalarEvolution &SE);
1118
1119 void Rate(const SCEV *Reg, const SmallBitVector &Bits,
1120 const SmallVector<LSRUse, 16> &Uses, const Loop *L,
1121 ScalarEvolution &SE);
1122
Dan Gohman8b0ade32010-01-21 22:42:49 +00001123 unsigned getNumRegs() const { return NumRegs; }
1124
Dan Gohmana10756e2010-01-21 02:09:26 +00001125 bool operator<(const Score &Other) const;
1126
1127 void print_details(raw_ostream &OS, const SCEV *Reg,
1128 const SmallPtrSet<const SCEV *, 8> &Regs) const;
1129
1130 void print(raw_ostream &OS) const;
1131 void dump() const;
1132
1133private:
1134 void RateRegister(const SCEV *Reg, SmallPtrSet<const SCEV *, 8> &Regs,
1135 const Loop *L);
1136 void RateFormula(const Formula &F, SmallPtrSet<const SCEV *, 8> &Regs,
1137 const Loop *L);
1138
1139 void Loose();
1140};
1141
1142}
1143
1144/// RateRegister - Tally up interesting quantities from the given register.
1145void Score::RateRegister(const SCEV *Reg,
1146 SmallPtrSet<const SCEV *, 8> &Regs,
1147 const Loop *L) {
1148 if (Regs.insert(Reg))
1149 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
1150 NumPhis += AR->getLoop() == L;
1151
1152 // Add the step value register, if it needs one.
1153 if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1)))
1154 RateRegister(AR->getOperand(1), Regs, L);
1155 }
1156}
1157
1158void Score::RateFormula(const Formula &F,
1159 SmallPtrSet<const SCEV *, 8> &Regs,
1160 const Loop *L) {
1161 // Tally up the registers.
1162 if (F.ScaledReg)
1163 RateRegister(F.ScaledReg, Regs, L);
1164 for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
1165 E = F.BaseRegs.end(); I != E; ++I) {
1166 const SCEV *BaseReg = *I;
1167 RateRegister(BaseReg, Regs, L);
1168
1169 NumIVMuls += isa<SCEVMulExpr>(BaseReg) &&
1170 BaseReg->hasComputableLoopEvolution(L);
1171 }
1172
1173 if (F.BaseRegs.size() > 1)
1174 NumBaseAdds += F.BaseRegs.size() - 1;
1175
1176 // Tally up the non-zero immediates.
1177 if (F.AM.BaseGV || F.AM.BaseOffs != 0)
1178 ++NumImms;
1179}
1180
1181/// Loose - Set this score to a loosing value.
1182void Score::Loose() {
1183 NumRegs = ~0u;
1184 NumPhis = ~0u;
1185 NumIVMuls = ~0u;
1186 NumBaseAdds = ~0u;
1187 NumImms = ~0u;
1188}
1189
1190/// RateInitial - Compute a score for the initial "fully reduced" solution.
1191void Score::RateInitial(SmallVector<LSRUse, 16> const &Uses, const Loop *L,
1192 ScalarEvolution &SE) {
1193 SmallPtrSet<const SCEV *, 8> Regs;
1194 for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
1195 E = Uses.end(); I != E; ++I)
1196 RateFormula(I->Formulae.front(), Regs, L);
1197 NumRegs += Regs.size();
1198
1199 DEBUG(print_details(dbgs(), 0, Regs));
1200}
1201
1202/// Rate - Compute a score for the solution where the reuse associated with
1203/// putting Reg in a register is selected.
1204void Score::Rate(const SCEV *Reg, const SmallBitVector &Bits,
1205 const SmallVector<LSRUse, 16> &Uses, const Loop *L,
1206 ScalarEvolution &SE) {
1207 SmallPtrSet<const SCEV *, 8> Regs;
1208 for (size_t i = 0, e = Uses.size(); i != e; ++i) {
1209 const LSRUse &LU = Uses[i];
1210
1211 const Formula *BestFormula = 0;
1212 if (i >= Bits.size() || !Bits.test(i))
1213 // This use doesn't use the current register. Just go with the current
1214 // leading candidate formula.
1215 BestFormula = &LU.Formulae.front();
1216 else
1217 // Find the best formula for this use that uses the current register.
1218 for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
1219 E = LU.Formulae.end(); I != E; ++I) {
1220 const Formula &F = *I;
1221 if (F.referencesReg(Reg) &&
1222 (!BestFormula || ComplexitySorter()(F, *BestFormula)))
1223 BestFormula = &F;
1224 }
1225
1226 // If we didn't find *any* forumlae, because earlier we eliminated some
1227 // in greedy fashion, skip the current register's reuse opportunity.
1228 if (!BestFormula) {
1229 DEBUG(dbgs() << "Reuse with reg " << *Reg
1230 << " wouldn't help any users.\n");
1231 Loose();
1232 return;
1233 }
1234
1235 // For an in-loop post-inc user, don't allow multiple base registers,
1236 // because that would require an awkward in-loop add after the increment.
1237 if (LU.PostIncLoop && LU.PostIncLoop->contains(LU.UserInst) &&
1238 BestFormula->BaseRegs.size() > 1) {
1239 DEBUG(dbgs() << "Reuse with reg " << *Reg
1240 << " would require an in-loop post-inc add: ";
1241 BestFormula->dump());
1242 Loose();
1243 return;
1244 }
1245
1246 RateFormula(*BestFormula, Regs, L);
1247 }
1248 NumRegs += Regs.size();
1249
1250 DEBUG(print_details(dbgs(), Reg, Regs));
1251}
1252
1253/// operator< - Choose the better score.
1254bool Score::operator<(const Score &Other) const {
1255 if (NumRegs != Other.NumRegs)
1256 return NumRegs < Other.NumRegs;
1257 if (NumPhis != Other.NumPhis)
1258 return NumPhis < Other.NumPhis;
1259 if (NumIVMuls != Other.NumIVMuls)
1260 return NumIVMuls < Other.NumIVMuls;
1261 if (NumBaseAdds != Other.NumBaseAdds)
1262 return NumBaseAdds < Other.NumBaseAdds;
1263 return NumImms < Other.NumImms;
1264}
1265
1266void Score::print_details(raw_ostream &OS,
1267 const SCEV *Reg,
1268 const SmallPtrSet<const SCEV *, 8> &Regs) const {
1269 if (Reg) OS << "Reuse with reg " << *Reg << " would require ";
1270 else OS << "The initial solution would require ";
1271 print(OS);
1272 OS << ". Regs:";
1273 for (SmallPtrSet<const SCEV *, 8>::const_iterator I = Regs.begin(),
1274 E = Regs.end(); I != E; ++I)
1275 OS << ' ' << **I;
1276 OS << '\n';
1277}
1278
1279void Score::print(raw_ostream &OS) const {
1280 OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
1281 if (NumPhis != 0)
1282 OS << ", including " << NumPhis << " PHI" << (NumPhis == 1 ? "" : "s");
1283 if (NumIVMuls != 0)
1284 OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s");
1285 if (NumBaseAdds != 0)
1286 OS << ", plus " << NumBaseAdds << " base add"
1287 << (NumBaseAdds == 1 ? "" : "s");
1288 if (NumImms != 0)
1289 OS << ", plus " << NumImms << " imm" << (NumImms == 1 ? "" : "s");
1290}
1291
1292void Score::dump() const {
1293 print(errs()); errs() << '\n';
Nate Begemaneaa13852004-10-18 21:08:22 +00001294}
1295
Dan Gohmanf284ce22009-02-18 00:08:39 +00001296/// isAddressUse - Returns true if the specified instruction is using the
Dale Johannesen203af582008-12-05 21:47:27 +00001297/// specified value as an address.
1298static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
1299 bool isAddress = isa<LoadInst>(Inst);
1300 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
1301 if (SI->getOperand(1) == OperandVal)
1302 isAddress = true;
1303 } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
1304 // Addressing modes can also be folded into prefetches and a variety
1305 // of intrinsics.
1306 switch (II->getIntrinsicID()) {
1307 default: break;
1308 case Intrinsic::prefetch:
1309 case Intrinsic::x86_sse2_loadu_dq:
1310 case Intrinsic::x86_sse2_loadu_pd:
1311 case Intrinsic::x86_sse_loadu_ps:
1312 case Intrinsic::x86_sse_storeu_ps:
1313 case Intrinsic::x86_sse2_storeu_pd:
1314 case Intrinsic::x86_sse2_storeu_dq:
1315 case Intrinsic::x86_sse2_storel_dq:
1316 if (II->getOperand(1) == OperandVal)
1317 isAddress = true;
1318 break;
1319 }
1320 }
1321 return isAddress;
1322}
Chris Lattner0ae33eb2005-10-03 01:04:44 +00001323
Dan Gohman21e77222009-03-09 21:01:17 +00001324/// getAccessType - Return the type of the memory being accessed.
1325static const Type *getAccessType(const Instruction *Inst) {
Dan Gohmana537bf82009-05-18 16:45:28 +00001326 const Type *AccessTy = Inst->getType();
Dan Gohman21e77222009-03-09 21:01:17 +00001327 if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
Dan Gohmana537bf82009-05-18 16:45:28 +00001328 AccessTy = SI->getOperand(0)->getType();
Dan Gohman21e77222009-03-09 21:01:17 +00001329 else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
1330 // Addressing modes can also be folded into prefetches and a variety
1331 // of intrinsics.
1332 switch (II->getIntrinsicID()) {
1333 default: break;
1334 case Intrinsic::x86_sse_storeu_ps:
1335 case Intrinsic::x86_sse2_storeu_pd:
1336 case Intrinsic::x86_sse2_storeu_dq:
1337 case Intrinsic::x86_sse2_storel_dq:
Dan Gohmana537bf82009-05-18 16:45:28 +00001338 AccessTy = II->getOperand(1)->getType();
Dan Gohman21e77222009-03-09 21:01:17 +00001339 break;
1340 }
1341 }
Dan Gohmana537bf82009-05-18 16:45:28 +00001342 return AccessTy;
Dan Gohman21e77222009-03-09 21:01:17 +00001343}
1344
Dan Gohmana10756e2010-01-21 02:09:26 +00001345/// DeleteTriviallyDeadInstructions - If any of the instructions is the
1346/// specified set are trivially dead, delete them and see if this makes any of
1347/// their operands subsequently dead.
1348static bool
1349DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
1350 bool Changed = false;
Jim Grosbach56a1f802009-11-17 17:53:56 +00001351
Dan Gohmana10756e2010-01-21 02:09:26 +00001352 while (!DeadInsts.empty()) {
1353 Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
Nate Begeman16997482005-07-30 00:15:07 +00001354
Dan Gohmana10756e2010-01-21 02:09:26 +00001355 if (I == 0 || !isInstructionTriviallyDead(I))
Evan Cheng55e641b2008-03-19 22:02:26 +00001356 continue;
Jim Grosbach56a1f802009-11-17 17:53:56 +00001357
Dan Gohmana10756e2010-01-21 02:09:26 +00001358 for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
1359 if (Instruction *U = dyn_cast<Instruction>(*OI)) {
1360 *OI = 0;
1361 if (U->use_empty())
1362 DeadInsts.push_back(U);
Dan Gohman2acc7602007-05-03 23:20:33 +00001363 }
Dan Gohman02e4fa72007-10-22 20:40:42 +00001364
Dan Gohmana10756e2010-01-21 02:09:26 +00001365 I->eraseFromParent();
Dan Gohmanafc36a92009-05-02 18:29:22 +00001366 Changed = true;
Evan Chengcdf43b12007-10-25 09:11:16 +00001367 }
1368
Dan Gohmana10756e2010-01-21 02:09:26 +00001369 return Changed;
Evan Chengcdf43b12007-10-25 09:11:16 +00001370}
1371
Dan Gohmana10756e2010-01-21 02:09:26 +00001372namespace {
Dan Gohmanad7321f2008-09-15 21:22:06 +00001373
Dan Gohmana10756e2010-01-21 02:09:26 +00001374/// LSRInstance - This class holds state for the main loop strength
1375/// reduction logic.
1376class LSRInstance {
1377 IVUsers &IU;
1378 ScalarEvolution &SE;
1379 DominatorTree &DT;
1380 const TargetLowering *const TLI;
1381 Loop *const L;
1382 bool Changed;
Dan Gohmanad7321f2008-09-15 21:22:06 +00001383
Dan Gohmana10756e2010-01-21 02:09:26 +00001384 /// CurrentArbitraryRegIndex - To ensure a deterministic ordering, assign an
1385 /// arbitrary index value to each register as a sort tie breaker.
1386 unsigned CurrentArbitraryRegIndex;
Dan Gohmanad7321f2008-09-15 21:22:06 +00001387
Dan Gohman8b0ade32010-01-21 22:42:49 +00001388 /// MaxNumRegs - To help prune the search for solutions, identify the number
1389 /// of registers needed by the initial solution. No formula should require
1390 /// more than this.
1391 unsigned MaxNumRegs;
1392
Dan Gohmana10756e2010-01-21 02:09:26 +00001393 /// Factors - Interesting factors between use strides.
1394 SmallSetVector<int64_t, 4> Factors;
Dan Gohmanad7321f2008-09-15 21:22:06 +00001395
Dan Gohmana10756e2010-01-21 02:09:26 +00001396 /// Types - Interesting use types, to facilitate truncation reuse.
1397 SmallSetVector<const Type *, 4> Types;
Dan Gohmanad7321f2008-09-15 21:22:06 +00001398
Dan Gohmana10756e2010-01-21 02:09:26 +00001399 /// Uses - The list of interesting uses.
1400 SmallVector<LSRUse, 16> Uses;
Dan Gohman4d1c1ef2009-06-19 23:03:46 +00001401
Dan Gohmana10756e2010-01-21 02:09:26 +00001402 // TODO: Reorganize these data structures.
1403 typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
1404 RegUsesTy RegUses;
1405 SmallVector<const SCEV *, 16> RegSequence;
Dan Gohmanad7321f2008-09-15 21:22:06 +00001406
Dan Gohmana10756e2010-01-21 02:09:26 +00001407 void OptimizeShadowIV();
1408 bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
1409 const SCEV* &CondStride);
1410 ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
1411 bool StrideMightBeShared(const SCEV* Stride);
1412 bool OptimizeLoopTermCond(Instruction *&IVIncInsertPos);
Dan Gohmanad7321f2008-09-15 21:22:06 +00001413
Dan Gohmana10756e2010-01-21 02:09:26 +00001414 LSRUse &getNewUse() {
1415 Uses.push_back(LSRUse());
1416 return Uses.back();
1417 }
Dan Gohmanbc10b8c2009-03-04 20:49:01 +00001418
Dan Gohmana10756e2010-01-21 02:09:26 +00001419 void CountRegister(const SCEV *Reg, uint32_t Complexity, size_t LUIdx);
1420 void CountRegisters(const Formula &F, size_t LUIdx);
Dan Gohmanad7321f2008-09-15 21:22:06 +00001421
Dan Gohman8b0ade32010-01-21 22:42:49 +00001422 bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
1423
Dan Gohmana10756e2010-01-21 02:09:26 +00001424 void GenerateSymbolicOffsetReuse(LSRUse &LU, unsigned LUIdx,
1425 Formula Base);
1426 void GenerateICmpZeroScaledReuse(LSRUse &LU, unsigned LUIdx,
1427 Formula Base);
1428 void GenerateFormulaeFromReplacedBaseReg(LSRUse &LU,
1429 unsigned LUIdx,
1430 const Formula &Base, unsigned i,
1431 const SmallVectorImpl<const SCEV *>
1432 &AddOps);
1433 void GenerateReassociationReuse(LSRUse &LU, unsigned LUIdx,
1434 Formula Base);
1435 void GenerateCombinationReuse(LSRUse &LU, unsigned LUIdx,
1436 Formula Base);
1437 void GenerateScaledReuse(LSRUse &LU, unsigned LUIdx,
1438 Formula Base);
1439 void GenerateTruncateReuse(LSRUse &LU, unsigned LUIdx,
1440 Formula Base);
Dan Gohman2781f302009-06-19 23:23:27 +00001441
Dan Gohmana10756e2010-01-21 02:09:26 +00001442 void GenerateConstantOffsetReuse();
Dan Gohmanad7321f2008-09-15 21:22:06 +00001443
Dan Gohmana10756e2010-01-21 02:09:26 +00001444 void GenerateAllReuseFormulae();
1445
1446 void GenerateLoopInvariantRegisterUses();
1447
1448public:
1449 LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
1450
1451 bool getChanged() const { return Changed; }
1452
1453 void print(raw_ostream &OS) const;
1454 void dump() const;
1455};
1456
Dan Gohmanad7321f2008-09-15 21:22:06 +00001457}
1458
Devang Patela0b39092008-08-26 17:57:54 +00001459/// OptimizeShadowIV - If IV is used in a int-to-float cast
1460/// inside the loop then try to eliminate the cast opeation.
Dan Gohmana10756e2010-01-21 02:09:26 +00001461void LSRInstance::OptimizeShadowIV() {
1462 const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
Dan Gohman46bdfb02009-02-24 18:55:53 +00001463 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
Devang Patela0b39092008-08-26 17:57:54 +00001464 return;
Jim Grosbach56a1f802009-11-17 17:53:56 +00001465
Dan Gohmana10756e2010-01-21 02:09:26 +00001466 for (size_t StrideIdx = 0, e = IU.StrideOrder.size();
1467 StrideIdx != e; ++StrideIdx) {
Dan Gohman0bba49c2009-07-07 17:06:11 +00001468 std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
Dan Gohmana10756e2010-01-21 02:09:26 +00001469 IU.IVUsesByStride.find(IU.StrideOrder[StrideIdx]);
1470 assert(SI != IU.IVUsesByStride.end() && "Stride doesn't exist!");
Devang Patela0b39092008-08-26 17:57:54 +00001471 if (!isa<SCEVConstant>(SI->first))
1472 continue;
1473
Dan Gohman81db61a2009-05-12 02:17:14 +00001474 for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
1475 E = SI->second->Users.end(); UI != E; /* empty */) {
1476 ilist<IVStrideUse>::iterator CandidateUI = UI;
Devang Patel54153272008-08-27 17:50:18 +00001477 ++UI;
Dan Gohman81db61a2009-05-12 02:17:14 +00001478 Instruction *ShadowUse = CandidateUI->getUser();
Devang Patela0b39092008-08-26 17:57:54 +00001479 const Type *DestTy = NULL;
1480
1481 /* If shadow use is a int->float cast then insert a second IV
Devang Patel54153272008-08-27 17:50:18 +00001482 to eliminate this cast.
Devang Patela0b39092008-08-26 17:57:54 +00001483
Jim Grosbach56a1f802009-11-17 17:53:56 +00001484 for (unsigned i = 0; i < n; ++i)
Devang Patela0b39092008-08-26 17:57:54 +00001485 foo((double)i);
1486
Devang Patel54153272008-08-27 17:50:18 +00001487 is transformed into
Devang Patela0b39092008-08-26 17:57:54 +00001488
1489 double d = 0.0;
Jim Grosbach56a1f802009-11-17 17:53:56 +00001490 for (unsigned i = 0; i < n; ++i, ++d)
Devang Patela0b39092008-08-26 17:57:54 +00001491 foo(d);
1492 */
Dan Gohman81db61a2009-05-12 02:17:14 +00001493 if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
Devang Patela0b39092008-08-26 17:57:54 +00001494 DestTy = UCast->getDestTy();
Dan Gohman81db61a2009-05-12 02:17:14 +00001495 else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser()))
Devang Patela0b39092008-08-26 17:57:54 +00001496 DestTy = SCast->getDestTy();
Devang Patel18bb2782008-08-27 20:55:23 +00001497 if (!DestTy) continue;
1498
1499 if (TLI) {
Evan Cheng5792f512009-05-11 22:33:01 +00001500 // If target does not support DestTy natively then do not apply
1501 // this transformation.
Owen Andersone50ed302009-08-10 22:56:29 +00001502 EVT DVT = TLI->getValueType(DestTy);
Devang Patel18bb2782008-08-27 20:55:23 +00001503 if (!TLI->isTypeLegal(DVT)) continue;
1504 }
1505
Devang Patela0b39092008-08-26 17:57:54 +00001506 PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
1507 if (!PH) continue;
1508 if (PH->getNumIncomingValues() != 2) continue;
1509
1510 const Type *SrcTy = PH->getType();
1511 int Mantissa = DestTy->getFPMantissaWidth();
Jim Grosbach56a1f802009-11-17 17:53:56 +00001512 if (Mantissa == -1) continue;
Dan Gohmana10756e2010-01-21 02:09:26 +00001513 if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
Devang Patela0b39092008-08-26 17:57:54 +00001514 continue;
1515
1516 unsigned Entry, Latch;
1517 if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
1518 Entry = 0;
1519 Latch = 1;
1520 } else {
1521 Entry = 1;
1522 Latch = 0;
1523 }
Jim Grosbach56a1f802009-11-17 17:53:56 +00001524
Devang Patela0b39092008-08-26 17:57:54 +00001525 ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
1526 if (!Init) continue;
Owen Anderson6f83c9c2009-07-27 20:59:43 +00001527 Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
Devang Patela0b39092008-08-26 17:57:54 +00001528
Jim Grosbach56a1f802009-11-17 17:53:56 +00001529 BinaryOperator *Incr =
Devang Patela0b39092008-08-26 17:57:54 +00001530 dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
1531 if (!Incr) continue;
1532 if (Incr->getOpcode() != Instruction::Add
1533 && Incr->getOpcode() != Instruction::Sub)
1534 continue;
1535
1536 /* Initialize new IV, double d = 0.0 in above example. */
1537 ConstantInt *C = NULL;
1538 if (Incr->getOperand(0) == PH)
1539 C = dyn_cast<ConstantInt>(Incr->getOperand(1));
1540 else if (Incr->getOperand(1) == PH)
1541 C = dyn_cast<ConstantInt>(Incr->getOperand(0));
1542 else
1543 continue;
1544
1545 if (!C) continue;
1546
Dan Gohmana8225082009-10-26 15:32:57 +00001547 // Ignore negative constants, as the code below doesn't handle them
1548 // correctly. TODO: Remove this restriction.
1549 if (!C->getValue().isStrictlyPositive()) continue;
1550
Devang Patela0b39092008-08-26 17:57:54 +00001551 /* Add new PHINode. */
1552 PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
1553
Devang Patel54153272008-08-27 17:50:18 +00001554 /* create new increment. '++d' in above example. */
Owen Anderson6f83c9c2009-07-27 20:59:43 +00001555 Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
Jim Grosbach56a1f802009-11-17 17:53:56 +00001556 BinaryOperator *NewIncr =
Dan Gohmanae3a0be2009-06-04 22:49:04 +00001557 BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
1558 Instruction::FAdd : Instruction::FSub,
Devang Patela0b39092008-08-26 17:57:54 +00001559 NewPH, CFP, "IV.S.next.", Incr);
1560
1561 NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
1562 NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
1563
1564 /* Remove cast operation */
Devang Patela0b39092008-08-26 17:57:54 +00001565 ShadowUse->replaceAllUsesWith(NewPH);
1566 ShadowUse->eraseFromParent();
Devang Patela0b39092008-08-26 17:57:54 +00001567 break;
1568 }
1569 }
1570}
1571
Dan Gohmana10756e2010-01-21 02:09:26 +00001572/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
1573/// set the IV user and stride information and return true, otherwise return
1574/// false.
1575bool LSRInstance::FindIVUserForCond(ICmpInst *Cond,
1576 IVStrideUse *&CondUse,
1577 const SCEV* &CondStride) {
1578 for (unsigned StrideIdx = 0, e = IU.StrideOrder.size();
1579 StrideIdx != e && !CondUse; ++StrideIdx) {
1580 std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
1581 IU.IVUsesByStride.find(IU.StrideOrder[StrideIdx]);
1582 assert(SI != IU.IVUsesByStride.end() && "Stride doesn't exist!");
Chris Lattner010de252005-08-08 05:28:22 +00001583
Dan Gohmana10756e2010-01-21 02:09:26 +00001584 for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
1585 E = SI->second->Users.end(); UI != E; ++UI)
1586 if (UI->getUser() == Cond) {
1587 // NOTE: we could handle setcc instructions with multiple uses here, but
1588 // InstCombine does it as well for simple uses, it's not clear that it
1589 // occurs enough in real life to handle.
1590 CondUse = UI;
1591 CondStride = SI->first;
1592 return true;
1593 }
1594 }
1595 return false;
Evan Cheng2d850522009-05-09 01:08:24 +00001596}
1597
Dan Gohmana10756e2010-01-21 02:09:26 +00001598/// OptimizeMax - Rewrite the loop's terminating condition if it uses
1599/// a max computation.
1600///
1601/// This is a narrow solution to a specific, but acute, problem. For loops
1602/// like this:
1603///
1604/// i = 0;
1605/// do {
1606/// p[i] = 0.0;
1607/// } while (++i < n);
1608///
1609/// the trip count isn't just 'n', because 'n' might not be positive. And
1610/// unfortunately this can come up even for loops where the user didn't use
1611/// a C do-while loop. For example, seemingly well-behaved top-test loops
1612/// will commonly be lowered like this:
1613//
1614/// if (n > 0) {
1615/// i = 0;
1616/// do {
1617/// p[i] = 0.0;
1618/// } while (++i < n);
1619/// }
1620///
1621/// and then it's possible for subsequent optimization to obscure the if
1622/// test in such a way that indvars can't find it.
1623///
1624/// When indvars can't find the if test in loops like this, it creates a
1625/// max expression, which allows it to give the loop a canonical
1626/// induction variable:
1627///
1628/// i = 0;
1629/// max = n < 1 ? 1 : n;
1630/// do {
1631/// p[i] = 0.0;
1632/// } while (++i != max);
1633///
1634/// Canonical induction variables are necessary because the loop passes
1635/// are designed around them. The most obvious example of this is the
1636/// LoopInfo analysis, which doesn't remember trip count values. It
1637/// expects to be able to rediscover the trip count each time it is
1638/// needed, and it does this using a simple analysis that only succeeds if
1639/// the loop has a canonical induction variable.
1640///
1641/// However, when it comes time to generate code, the maximum operation
1642/// can be quite costly, especially if it's inside of an outer loop.
1643///
1644/// This function solves this problem by detecting this type of loop and
1645/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
1646/// the instructions for the maximum computation.
1647///
1648ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
1649 // Check that the loop matches the pattern we're looking for.
1650 if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
1651 Cond->getPredicate() != CmpInst::ICMP_NE)
1652 return Cond;
1653
1654 SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
1655 if (!Sel || !Sel->hasOneUse()) return Cond;
1656
1657 const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
1658 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
1659 return Cond;
1660 const SCEV *One = SE.getIntegerSCEV(1, BackedgeTakenCount->getType());
1661
1662 // Add one to the backedge-taken count to get the trip count.
1663 const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One);
1664
1665 // Check for a max calculation that matches the pattern.
1666 if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount))
1667 return Cond;
1668 const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount);
1669 if (Max != SE.getSCEV(Sel)) return Cond;
1670
1671 // To handle a max with more than two operands, this optimization would
1672 // require additional checking and setup.
1673 if (Max->getNumOperands() != 2)
1674 return Cond;
1675
1676 const SCEV *MaxLHS = Max->getOperand(0);
1677 const SCEV *MaxRHS = Max->getOperand(1);
1678 if (!MaxLHS || MaxLHS != One) return Cond;
1679 // Check the relevant induction variable for conformance to
1680 // the pattern.
1681 const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
1682 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
1683 if (!AR || !AR->isAffine() ||
1684 AR->getStart() != One ||
1685 AR->getStepRecurrence(SE) != One)
1686 return Cond;
1687
1688 assert(AR->getLoop() == L &&
1689 "Loop condition operand is an addrec in a different loop!");
1690
1691 // Check the right operand of the select, and remember it, as it will
1692 // be used in the new comparison instruction.
1693 Value *NewRHS = 0;
1694 if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
1695 NewRHS = Sel->getOperand(1);
1696 else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
1697 NewRHS = Sel->getOperand(2);
1698 if (!NewRHS) return Cond;
1699
1700 // Determine the new comparison opcode. It may be signed or unsigned,
1701 // and the original comparison may be either equality or inequality.
1702 CmpInst::Predicate Pred =
1703 isa<SCEVSMaxExpr>(Max) ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
1704 if (Cond->getPredicate() == CmpInst::ICMP_EQ)
1705 Pred = CmpInst::getInversePredicate(Pred);
1706
1707 // Ok, everything looks ok to change the condition into an SLT or SGE and
1708 // delete the max calculation.
1709 ICmpInst *NewCond =
1710 new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
1711
1712 // Delete the max calculation instructions.
1713 Cond->replaceAllUsesWith(NewCond);
1714 CondUse->setUser(NewCond);
1715 Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
1716 Cond->eraseFromParent();
1717 Sel->eraseFromParent();
1718 if (Cmp->use_empty())
1719 Cmp->eraseFromParent();
1720 return NewCond;
1721}
1722
1723bool LSRInstance::StrideMightBeShared(const SCEV* Stride) {
Evan Cheng586f69a2009-11-12 07:35:05 +00001724 int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue();
Dan Gohmana10756e2010-01-21 02:09:26 +00001725 for (unsigned i = 0, e = IU.StrideOrder.size(); i != e; ++i) {
Evan Cheng586f69a2009-11-12 07:35:05 +00001726 std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
Dan Gohmana10756e2010-01-21 02:09:26 +00001727 IU.IVUsesByStride.find(IU.StrideOrder[i]);
Evan Cheng586f69a2009-11-12 07:35:05 +00001728 const SCEV *Share = SI->first;
1729 if (!isa<SCEVConstant>(SI->first) || Share == Stride)
1730 continue;
1731 int64_t SSInt = cast<SCEVConstant>(Share)->getValue()->getSExtValue();
1732 if (SSInt == SInt)
1733 return true; // This can definitely be reused.
1734 if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0)
1735 continue;
1736 int64_t Scale = SSInt / SInt;
Dan Gohmana10756e2010-01-21 02:09:26 +00001737
1738 // This AM will be used for conservative queries. At this point in the
1739 // process we don't know which users will have a base reg, immediate,
1740 // etc., so we conservatively assume that it may not, making more
1741 // strides valid, thus erring on the side of assuming that there
1742 // might be sharing.
1743 TargetLowering::AddrMode AM;
1744 AM.Scale = Scale;
1745
1746 // Any pre-inc iv use?
1747 IVUsersOfOneStride &StrideUses = *IU.IVUsesByStride[Share];
1748 for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
1749 E = StrideUses.Users.end(); I != E; ++I) {
1750 bool isAddress = isAddressUse(I->getUser(), I->getOperandValToReplace());
1751 if (!I->isUseOfPostIncrementedValue() &&
1752 isLegalUse(AM, isAddress ? LSRUse::Address : LSRUse::Basic,
1753 isAddress ? getAccessType(I->getUser()) : 0,
1754 TLI))
Evan Cheng586f69a2009-11-12 07:35:05 +00001755 return true;
Evan Cheng5792f512009-05-11 22:33:01 +00001756 }
Evan Cheng5792f512009-05-11 22:33:01 +00001757 }
Evan Cheng586f69a2009-11-12 07:35:05 +00001758 return false;
Chris Lattner010de252005-08-08 05:28:22 +00001759}
Nate Begeman16997482005-07-30 00:15:07 +00001760
Jim Grosbach56a1f802009-11-17 17:53:56 +00001761/// OptimizeLoopTermCond - Change loop terminating condition to use the
Evan Cheng586f69a2009-11-12 07:35:05 +00001762/// postinc iv when possible.
Dan Gohmana10756e2010-01-21 02:09:26 +00001763bool
1764LSRInstance::OptimizeLoopTermCond(Instruction *&IVIncInsertPos) {
1765 SmallPtrSet<Instruction *, 4> PostIncs;
1766
Evan Cheng586f69a2009-11-12 07:35:05 +00001767 BasicBlock *LatchBlock = L->getLoopLatch();
Evan Cheng076e0852009-11-17 18:10:11 +00001768 SmallVector<BasicBlock*, 8> ExitingBlocks;
1769 L->getExitingBlocks(ExitingBlocks);
Jim Grosbach56a1f802009-11-17 17:53:56 +00001770
Evan Cheng076e0852009-11-17 18:10:11 +00001771 for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
1772 BasicBlock *ExitingBlock = ExitingBlocks[i];
Evan Cheng586f69a2009-11-12 07:35:05 +00001773
Dan Gohmana10756e2010-01-21 02:09:26 +00001774 // Get the terminating condition for the loop if possible. If we
Evan Cheng076e0852009-11-17 18:10:11 +00001775 // can, we want to change it to use a post-incremented version of its
1776 // induction variable, to allow coalescing the live ranges for the IV into
1777 // one register value.
Evan Cheng586f69a2009-11-12 07:35:05 +00001778
Evan Cheng076e0852009-11-17 18:10:11 +00001779 BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
1780 if (!TermBr)
1781 continue;
1782 // FIXME: Overly conservative, termination condition could be an 'or' etc..
1783 if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
1784 continue;
Evan Cheng586f69a2009-11-12 07:35:05 +00001785
Evan Cheng076e0852009-11-17 18:10:11 +00001786 // Search IVUsesByStride to find Cond's IVUse if there is one.
1787 IVStrideUse *CondUse = 0;
1788 const SCEV *CondStride = 0;
1789 ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
1790 if (!FindIVUserForCond(Cond, CondUse, CondStride))
1791 continue;
1792
Evan Cheng076e0852009-11-17 18:10:11 +00001793 // If the trip count is computed in terms of a max (due to ScalarEvolution
1794 // being unable to find a sufficient guard, for example), change the loop
1795 // comparison to use SLT or ULT instead of NE.
Dan Gohmana10756e2010-01-21 02:09:26 +00001796 // One consequence of doing this now is that it disrupts the count-down
1797 // optimization. That's not always a bad thing though, because in such
1798 // cases it may still be worthwhile to avoid a max.
1799 Cond = OptimizeMax(Cond, CondUse);
Evan Cheng076e0852009-11-17 18:10:11 +00001800
Dan Gohmana10756e2010-01-21 02:09:26 +00001801 // If this exiting block is the latch block, and the condition has only
1802 // one use inside the loop (the branch), use the post-incremented value
1803 // of the induction variable
1804 if (ExitingBlock != LatchBlock) {
1805 // If this exiting block dominates the latch block, it may also use
1806 // the post-inc value if it won't be shared with other uses.
1807 // Check for dominance.
1808 if (!DT.dominates(ExitingBlock, LatchBlock))
1809 continue;
1810 // Check for sharing within the same stride.
1811 bool SameStrideSharing = false;
1812 IVUsersOfOneStride &StrideUses = *IU.IVUsesByStride[CondStride];
1813 for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
1814 E = StrideUses.Users.end(); I != E; ++I) {
1815 if (I->getUser() == Cond)
1816 continue;
1817 if (!I->isUseOfPostIncrementedValue()) {
1818 SameStrideSharing = true;
1819 break;
1820 }
1821 }
1822 if (SameStrideSharing)
1823 continue;
1824 // Check for sharing from a different stride.
1825 if (isa<SCEVConstant>(CondStride) && StrideMightBeShared(CondStride))
1826 continue;
1827 }
1828 if (!Cond->hasOneUse()) {
1829 bool HasOneUseInLoop = true;
1830 for (Value::use_iterator UI = Cond->use_begin(), UE = Cond->use_end();
1831 UI != UE; ++UI) {
1832 Instruction *U = cast<Instruction>(*UI);
1833 if (U == TermBr)
1834 continue;
1835 if (L->contains(U)) {
1836 HasOneUseInLoop = false;
1837 break;
1838 }
1839 }
1840 if (!HasOneUseInLoop)
1841 continue;
Evan Cheng076e0852009-11-17 18:10:11 +00001842 }
1843
David Greene63c94632009-12-23 22:58:38 +00001844 DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "
Dan Gohmana10756e2010-01-21 02:09:26 +00001845 << *Cond << '\n');
Evan Cheng076e0852009-11-17 18:10:11 +00001846
1847 // It's possible for the setcc instruction to be anywhere in the loop, and
1848 // possible for it to have multiple users. If it is not immediately before
1849 // the exiting block branch, move it.
Dan Gohmana10756e2010-01-21 02:09:26 +00001850 if (&*++BasicBlock::iterator(Cond) != TermBr) {
Evan Cheng076e0852009-11-17 18:10:11 +00001851 if (Cond->hasOneUse()) { // Condition has a single use, just move it.
1852 Cond->moveBefore(TermBr);
1853 } else {
1854 // Otherwise, clone the terminating condition and insert into the
1855 // loopend.
Dan Gohmana10756e2010-01-21 02:09:26 +00001856 ICmpInst *OldCond = Cond;
Evan Cheng076e0852009-11-17 18:10:11 +00001857 Cond = cast<ICmpInst>(Cond->clone());
1858 Cond->setName(L->getHeader()->getName() + ".termcond");
1859 ExitingBlock->getInstList().insert(TermBr, Cond);
1860
1861 // Clone the IVUse, as the old use still exists!
Dan Gohmana10756e2010-01-21 02:09:26 +00001862 IU.IVUsesByStride[CondStride]->addUser(CondUse->getOffset(), Cond,
Evan Cheng076e0852009-11-17 18:10:11 +00001863 CondUse->getOperandValToReplace());
Dan Gohmana10756e2010-01-21 02:09:26 +00001864 CondUse = &IU.IVUsesByStride[CondStride]->Users.back();
1865 TermBr->replaceUsesOfWith(OldCond, Cond);
Evan Cheng076e0852009-11-17 18:10:11 +00001866 }
Evan Cheng586f69a2009-11-12 07:35:05 +00001867 }
1868
Evan Cheng076e0852009-11-17 18:10:11 +00001869 // If we get to here, we know that we can transform the setcc instruction to
1870 // use the post-incremented version of the IV, allowing us to coalesce the
1871 // live ranges for the IV correctly.
Dan Gohmana10756e2010-01-21 02:09:26 +00001872 CondUse->setOffset(SE.getMinusSCEV(CondUse->getOffset(), CondStride));
Evan Cheng076e0852009-11-17 18:10:11 +00001873 CondUse->setIsUseOfPostIncrementedValue(true);
1874 Changed = true;
1875
Dan Gohmana10756e2010-01-21 02:09:26 +00001876 PostIncs.insert(Cond);
1877 }
1878
1879 // Determine an insertion point for the loop induction variable increment. It
1880 // must dominate all the post-inc comparisons we just set up, and it must
1881 // dominate the loop latch edge.
1882 IVIncInsertPos = L->getLoopLatch()->getTerminator();
1883 for (SmallPtrSet<Instruction *, 4>::iterator I = PostIncs.begin(),
1884 E = PostIncs.end(); I != E; ++I) {
1885 BasicBlock *BB =
1886 DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
1887 (*I)->getParent());
1888 if (BB == (*I)->getParent())
1889 IVIncInsertPos = *I;
1890 else if (BB != IVIncInsertPos->getParent())
1891 IVIncInsertPos = BB->getTerminator();
1892 }
1893
1894 return Changed;
1895}
1896
1897/// CountRegisters - Note the given register.
1898void LSRInstance::CountRegister(const SCEV *Reg, uint32_t Complexity,
1899 size_t LUIdx) {
1900 std::pair<RegUsesTy::iterator, bool> Pair =
1901 RegUses.insert(std::make_pair(Reg, RegSortData()));
1902 RegSortData &BV = Pair.first->second;
1903 if (Pair.second) {
1904 BV.Index = CurrentArbitraryRegIndex++;
1905 BV.MaxComplexity = Complexity;
1906 RegSequence.push_back(Reg);
1907 } else {
1908 BV.MaxComplexity = std::max(BV.MaxComplexity, Complexity);
1909 }
1910 BV.Bits.resize(std::max(BV.Bits.size(), LUIdx + 1));
1911 BV.Bits.set(LUIdx);
1912}
1913
1914/// CountRegisters - Note which registers are used by the given formula,
1915/// updating RegUses.
1916void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
1917 uint32_t Complexity = F.getComplexity();
1918 if (F.ScaledReg)
1919 CountRegister(F.ScaledReg, Complexity, LUIdx);
1920 for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
1921 E = F.BaseRegs.end(); I != E; ++I)
1922 CountRegister(*I, Complexity, LUIdx);
1923}
1924
Dan Gohman8b0ade32010-01-21 22:42:49 +00001925/// InsertFormula - If the given formula has not yet been inserted, add it
1926/// to the list, and return true. Return false otherwise.
1927bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
1928 // If a formula by itself would require more registers than the base solution,
1929 // discard it and stop searching from it, as it won't be profitable. This is
1930 // actually more permissive than it could be, because it doesn't include
1931 // registers used by non-constant strides in F.
1932 if (F.getNumRegs() > MaxNumRegs)
1933 return false;
1934
1935 if (!LU.InsertFormula(F))
1936 return false;
1937
1938 CountRegisters(LU.Formulae.back(), LUIdx);
1939 return true;
1940}
1941
Dan Gohmana10756e2010-01-21 02:09:26 +00001942/// GenerateSymbolicOffsetReuse - Generate reuse formulae using symbolic
1943/// offsets.
1944void LSRInstance::GenerateSymbolicOffsetReuse(LSRUse &LU, unsigned LUIdx,
1945 Formula Base) {
1946 // We can't add a symbolic offset if the address already contains one.
1947 if (Base.AM.BaseGV) return;
1948
1949 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
1950 const SCEV *G = Base.BaseRegs[i];
1951 GlobalValue *GV = ExtractSymbol(G, SE);
1952 if (G->isZero())
1953 continue;
1954 Formula F = Base;
1955 F.AM.BaseGV = GV;
1956 if (!isLegalUse(F.AM, LU.Kind, LU.AccessTy, TLI))
1957 continue;
1958 F.BaseRegs[i] = G;
Dan Gohman8b0ade32010-01-21 22:42:49 +00001959 (void)InsertFormula(LU, LUIdx, F);
Evan Cheng586f69a2009-11-12 07:35:05 +00001960 }
Evan Cheng586f69a2009-11-12 07:35:05 +00001961}
1962
Dan Gohmana10756e2010-01-21 02:09:26 +00001963/// GenerateICmpZeroScaledReuse - For ICmpZero, check to see if we can scale up
1964/// the comparison. For example, x == y -> x*c == y*c.
1965void LSRInstance::GenerateICmpZeroScaledReuse(LSRUse &LU, unsigned LUIdx,
1966 Formula Base) {
1967 if (LU.Kind != LSRUse::ICmpZero) return;
Evan Cheng586f69a2009-11-12 07:35:05 +00001968
Dan Gohmana10756e2010-01-21 02:09:26 +00001969 // Determine the integer type for the base formula.
1970 const Type *IntTy = Base.getType();
1971 if (!IntTy) return;
1972 if (SE.getTypeSizeInBits(IntTy) > 64) return;
1973 IntTy = SE.getEffectiveSCEVType(IntTy);
Evan Cheng586f69a2009-11-12 07:35:05 +00001974
Dan Gohmana10756e2010-01-21 02:09:26 +00001975 assert(!Base.AM.BaseGV && "ICmpZero use is not legal!");
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00001976
Dan Gohmana10756e2010-01-21 02:09:26 +00001977 // Check each interesting stride.
1978 for (SmallSetVector<int64_t, 4>::const_iterator
1979 I = Factors.begin(), E = Factors.end(); I != E; ++I) {
1980 int64_t Factor = *I;
1981 Formula F = Base;
1982
1983 // Check that the multiplication doesn't overflow.
1984 F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs * Factor;
1985 if ((int64_t)F.AM.BaseOffs / Factor != F.AM.BaseOffs)
1986 continue;
1987
1988 // Check that this scale is legal.
1989 if (!isLegalUse(F.AM, LU.Kind, LU.AccessTy, TLI))
1990 continue;
1991
1992 const SCEV *FactorS = SE.getSCEV(ConstantInt::get(IntTy, Factor));
1993
1994 // Check that multiplying with each base register doesn't overflow.
1995 for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
1996 F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
1997 if (getSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
1998 goto next;
1999 }
2000
2001 // Check that multiplying with the scaled register doesn't overflow.
2002 if (F.ScaledReg) {
2003 F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
2004 if (getSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
2005 continue;
2006 }
2007
2008 // If we make it here and it's legal, add it.
Dan Gohman8b0ade32010-01-21 22:42:49 +00002009 (void)InsertFormula(LU, LUIdx, F);
Dan Gohmana10756e2010-01-21 02:09:26 +00002010 next:;
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00002011 }
Dan Gohmana10756e2010-01-21 02:09:26 +00002012}
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00002013
Dan Gohmana10756e2010-01-21 02:09:26 +00002014/// GenerateFormulaeFromReplacedBaseReg - If removing base register with
2015/// index i from the BaseRegs list and adding the registers in AddOps
2016/// to the address forms an interesting formula, pursue it.
2017void
2018LSRInstance::GenerateFormulaeFromReplacedBaseReg(
2019 LSRUse &LU,
2020 unsigned LUIdx,
2021 const Formula &Base, unsigned i,
2022 const SmallVectorImpl<const SCEV *>
2023 &AddOps) {
2024 if (AddOps.empty()) return;
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00002025
Dan Gohmana10756e2010-01-21 02:09:26 +00002026 Formula F = Base;
2027 std::swap(F.BaseRegs[i], F.BaseRegs.back());
2028 F.BaseRegs.pop_back();
2029 for (SmallVectorImpl<const SCEV *>::const_iterator I = AddOps.begin(),
2030 E = AddOps.end(); I != E; ++I)
2031 F.BaseRegs.push_back(*I);
2032 F.AM.HasBaseReg = !F.BaseRegs.empty();
Dan Gohman8b0ade32010-01-21 22:42:49 +00002033 if (InsertFormula(LU, LUIdx, F))
2034 // If that formula hadn't been seen before, recurse to find more like it.
Dan Gohmana10756e2010-01-21 02:09:26 +00002035 GenerateReassociationReuse(LU, LUIdx, LU.Formulae.back());
Dan Gohmana10756e2010-01-21 02:09:26 +00002036}
Evan Cheng81ebdcf2009-11-10 21:14:05 +00002037
Dan Gohmana10756e2010-01-21 02:09:26 +00002038/// GenerateReassociationReuse - Split out subexpressions from adds and
2039/// the bases of addrecs.
2040void LSRInstance::GenerateReassociationReuse(LSRUse &LU, unsigned LUIdx,
2041 Formula Base) {
2042 SmallVector<const SCEV *, 8> AddOps;
2043 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
2044 const SCEV *BaseReg = Base.BaseRegs[i];
2045 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BaseReg)) {
2046 for (SCEVAddExpr::op_iterator J = Add->op_begin(), JE = Add->op_end();
2047 J != JE; ++J) {
2048 // Don't pull a constant into a register if the constant could be
2049 // folded into an immediate field.
2050 if (isAlwaysFoldable(*J, true, LU.Kind, LU.AccessTy, TLI, SE)) continue;
2051 SmallVector<const SCEV *, 8> InnerAddOps;
2052 for (SCEVAddExpr::op_iterator K = Add->op_begin(); K != JE; ++K)
2053 if (K != J)
2054 InnerAddOps.push_back(*K);
2055 // Splitting a 2-operand add both ways is redundant. Pruning this
2056 // now saves compile time.
2057 if (InnerAddOps.size() < 2 && next(J) == JE)
2058 continue;
2059 AddOps.push_back(*J);
2060 const SCEV *InnerAdd = SE.getAddExpr(InnerAddOps);
2061 AddOps.push_back(InnerAdd);
2062 GenerateFormulaeFromReplacedBaseReg(LU, LUIdx, Base, i, AddOps);
2063 AddOps.clear();
2064 }
2065 } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BaseReg)) {
2066 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(AR->getStart())) {
2067 for (SCEVAddExpr::op_iterator J = Add->op_begin(), JE = Add->op_end();
2068 J != JE; ++J) {
2069 // Don't pull a constant into a register if the constant could be
2070 // folded into an immediate field.
2071 if (isAlwaysFoldable(*J, true, LU.Kind, LU.AccessTy, TLI, SE))
2072 continue;
2073 SmallVector<const SCEV *, 8> InnerAddOps;
2074 for (SCEVAddExpr::op_iterator K = Add->op_begin(); K != JE; ++K)
2075 if (K != J)
2076 InnerAddOps.push_back(*K);
2077 AddOps.push_back(*J);
2078 const SCEV *InnerAdd = SE.getAddExpr(InnerAddOps);
2079 AddOps.push_back(SE.getAddRecExpr(InnerAdd,
2080 AR->getStepRecurrence(SE),
2081 AR->getLoop()));
2082 GenerateFormulaeFromReplacedBaseReg(LU, LUIdx, Base, i, AddOps);
2083 AddOps.clear();
2084 }
2085 } else if (!isAlwaysFoldable(AR->getStart(), Base.BaseRegs.size() > 1,
2086 LU.Kind, LU.AccessTy,
2087 TLI, SE)) {
2088 AddOps.push_back(AR->getStart());
2089 AddOps.push_back(SE.getAddRecExpr(SE.getIntegerSCEV(0,
2090 BaseReg->getType()),
2091 AR->getStepRecurrence(SE),
2092 AR->getLoop()));
2093 GenerateFormulaeFromReplacedBaseReg(LU, LUIdx, Base, i, AddOps);
2094 AddOps.clear();
2095 }
2096 }
2097 }
2098}
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00002099
Dan Gohmana10756e2010-01-21 02:09:26 +00002100/// GenerateCombinationReuse - Generate a formula consisting of all of the
2101/// loop-dominating registers added into a single register.
2102void LSRInstance::GenerateCombinationReuse(LSRUse &LU, unsigned LUIdx,
2103 Formula Base) {
2104 if (Base.BaseRegs.size() <= 1) return;
2105
2106 Formula F = Base;
2107 F.BaseRegs.clear();
2108 SmallVector<const SCEV *, 4> Ops;
2109 for (SmallVectorImpl<const SCEV *>::const_iterator
2110 I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) {
2111 const SCEV *BaseReg = *I;
2112 if (BaseReg->properlyDominates(L->getHeader(), &DT) &&
2113 !BaseReg->hasComputableLoopEvolution(L))
2114 Ops.push_back(BaseReg);
2115 else
2116 F.BaseRegs.push_back(BaseReg);
2117 }
2118 if (Ops.size() > 1) {
2119 F.BaseRegs.push_back(SE.getAddExpr(Ops));
Dan Gohman8b0ade32010-01-21 22:42:49 +00002120 (void)InsertFormula(LU, LUIdx, F);
Dan Gohmana10756e2010-01-21 02:09:26 +00002121 }
2122}
2123
2124/// GenerateScaledReuse - Generate stride factor reuse formulae by making
2125/// use of scaled-offset address modes, for example.
2126void LSRInstance::GenerateScaledReuse(LSRUse &LU, unsigned LUIdx,
2127 Formula Base) {
2128 // Determine the integer type for the base formula.
2129 const Type *IntTy = Base.getType();
2130 if (!IntTy) return;
2131 IntTy = SE.getEffectiveSCEVType(IntTy);
2132
2133 // Check each interesting stride.
2134 for (SmallSetVector<int64_t, 4>::const_iterator
2135 I = Factors.begin(), E = Factors.end(); I != E; ++I) {
2136 int64_t Factor = *I;
2137
2138 // If this Formula already has a scaled register, we can't add another one.
2139 if (Base.AM.Scale != 0)
2140 continue;
2141 Formula F = Base;
2142 F.AM.Scale = Factor;
2143 // Check whether this scale is going to be legal.
2144 if (!isLegalUse(F.AM, LU.Kind, LU.AccessTy, TLI)) {
2145 // As a special-case, handle special out-of-loop Basic users specially.
2146 // TODO: Reconsider this special case.
2147 if (LU.Kind == LSRUse::Basic &&
2148 isLegalUse(F.AM, LSRUse::Special, LU.AccessTy, TLI) &&
2149 !L->contains(LU.UserInst))
2150 LU.Kind = LSRUse::Special;
2151 else
2152 continue;
2153 }
2154 // For each addrec base reg, apply the scale, if possible.
2155 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
2156 if (const SCEVAddRecExpr *AR =
2157 dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
2158 const SCEV *FactorS = SE.getSCEV(ConstantInt::get(IntTy, Factor));
2159 // Divide out the factor, ignoring high bits, since we'll be
2160 // scaling the value back up in the end.
2161 if (const SCEV *Quotient = getSDiv(AR, FactorS, SE, true)) {
2162 // TODO: This could be optimized to avoid all the copying.
2163 Formula NewF = F;
2164 NewF.ScaledReg = Quotient;
2165 std::swap(NewF.BaseRegs[i], NewF.BaseRegs.back());
2166 NewF.BaseRegs.pop_back();
2167 NewF.AM.HasBaseReg = !NewF.BaseRegs.empty();
Dan Gohman8b0ade32010-01-21 22:42:49 +00002168 (void)InsertFormula(LU, LUIdx, NewF);
Dan Gohmana10756e2010-01-21 02:09:26 +00002169 }
Evan Cheng586f69a2009-11-12 07:35:05 +00002170 }
2171 }
Evan Cheng586f69a2009-11-12 07:35:05 +00002172}
2173
Dan Gohmana10756e2010-01-21 02:09:26 +00002174/// GenerateTruncateReuse - Generate reuse formulae from different IV types.
2175void LSRInstance::GenerateTruncateReuse(LSRUse &LU, unsigned LUIdx,
2176 Formula Base) {
2177 // This requires TargetLowering to tell us which truncates are free.
2178 if (!TLI) return;
Evan Cheng586f69a2009-11-12 07:35:05 +00002179
Dan Gohmana10756e2010-01-21 02:09:26 +00002180 // Don't attempt to truncate symbolic values.
2181 if (Base.AM.BaseGV) return;
2182
2183 // Determine the integer type for the base formula.
2184 const Type *DstTy = Base.getType();
2185 if (!DstTy) return;
2186 DstTy = SE.getEffectiveSCEVType(DstTy);
2187
2188 for (SmallSetVector<const Type *, 4>::const_iterator
2189 I = Types.begin(), E = Types.end(); I != E; ++I) {
2190 const Type *SrcTy = *I;
2191 if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
2192 Formula F = Base;
2193 if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
2194 for (SmallVectorImpl<const SCEV *>::iterator J = F.BaseRegs.begin(),
2195 JE = F.BaseRegs.end(); J != JE; ++J)
2196 *J = SE.getAnyExtendExpr(*J, SrcTy);
Dan Gohman8b0ade32010-01-21 22:42:49 +00002197 (void)InsertFormula(LU, LUIdx, F);
Dan Gohmana10756e2010-01-21 02:09:26 +00002198 }
2199 }
2200}
2201
2202namespace {
2203
2204/// WorkItem - Helper class for GenerateConstantOffsetReuse. It's used to
2205/// defer modifications so that the search phase doesn't have to worry about
2206/// the data structures moving underneath it.
2207struct WorkItem {
2208 LSRUse *LU;
2209 size_t LUIdx;
2210 int64_t Imm;
2211 const SCEV *OrigReg;
2212
2213 WorkItem(LSRUse *U, size_t LI, int64_t I, const SCEV *R)
2214 : LU(U), LUIdx(LI), Imm(I), OrigReg(R) {}
2215
2216 void print(raw_ostream &OS) const;
2217 void dump() const;
2218};
2219
2220void WorkItem::print(raw_ostream &OS) const {
2221 OS << "in use ";
2222 LU->print(OS);
2223 OS << " (at index " << LUIdx << "), add offset " << Imm
2224 << " and compensate by adjusting refences to " << *OrigReg << "\n";
2225}
2226
2227void WorkItem::dump() const {
2228 print(errs()); errs() << '\n';
2229}
2230
2231}
2232
2233/// GenerateConstantOffsetReuse - Look for registers which are a constant
2234/// distance apart and try to form reuse opportunities between them.
2235void LSRInstance::GenerateConstantOffsetReuse() {
2236 // Group the registers by their value without any added constant offset.
2237 typedef std::map<int64_t, const SCEV *> ImmMapTy;
2238 typedef DenseMap<const SCEV *, ImmMapTy> RegMapTy;
2239 RegMapTy Map;
2240 SmallVector<const SCEV *, 8> Sequence;
2241 for (SmallVectorImpl<const SCEV *>::iterator I = RegSequence.begin(),
2242 E = RegSequence.end(); I != E; ++I) {
2243 const SCEV *Reg = *I;
2244 int64_t Imm = ExtractImmediate(Reg, SE);
2245 std::pair<RegMapTy::iterator, bool> Pair =
2246 Map.insert(std::make_pair(Reg, ImmMapTy()));
2247 if (Pair.second)
2248 Sequence.push_back(Reg);
2249 Pair.first->second.insert(std::make_pair(Imm, *I));
2250 }
2251
2252 // Insert an artificial expression at offset 0 (if there isn't one already),
2253 // as this may lead to more reuse opportunities.
2254 for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(),
2255 E = Sequence.end(); I != E; ++I)
2256 Map.find(*I)->second.insert(ImmMapTy::value_type(0, 0));
2257
2258 // Now examine each set of registers with the same base value. Build up
2259 // a list of work to do and do the work in a separate step so that we're
2260 // not adding formulae and register counts while we're searching.
2261 SmallVector<WorkItem, 32> WorkItems;
2262 for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(),
2263 E = Sequence.end(); I != E; ++I) {
2264 const SCEV *Reg = *I;
2265 const ImmMapTy &Imms = Map.find(Reg)->second;
2266 // Examine each offset.
2267 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
2268 J != JE; ++J) {
2269 const SCEV *OrigReg = J->second;
2270 // Skip the artifical register at offset 0.
2271 if (!OrigReg) continue;
2272
2273 int64_t JImm = J->first;
2274 const SmallBitVector &Bits = RegUses.find(OrigReg)->second.Bits;
2275
2276 // Examine each other offset associated with the same register. This is
2277 // quadradic in the number of registers with the same base, but it's
2278 // uncommon for this to be a large number.
2279 for (ImmMapTy::const_iterator M = Imms.begin(); M != JE; ++M) {
2280 if (M == J) continue;
2281
2282 // Compute the difference between the two.
2283 int64_t Imm = (uint64_t)JImm - M->first;
2284 for (int LUIdx = Bits.find_first(); LUIdx != -1;
2285 LUIdx = Bits.find_next(LUIdx))
2286 // Make a memo of this use, offset, and register tuple.
2287 WorkItems.push_back(WorkItem(&Uses[LUIdx], LUIdx, Imm, OrigReg));
Evan Cheng586f69a2009-11-12 07:35:05 +00002288 }
2289 }
2290 }
2291
Dan Gohmana10756e2010-01-21 02:09:26 +00002292 // Now iterate through the worklist and add new formulae.
2293 for (SmallVectorImpl<WorkItem>::const_iterator I = WorkItems.begin(),
2294 E = WorkItems.end(); I != E; ++I) {
2295 const WorkItem &WI = *I;
2296 LSRUse &LU = *WI.LU;
2297 size_t LUIdx = WI.LUIdx;
2298 int64_t Imm = WI.Imm;
2299 const SCEV *OrigReg = WI.OrigReg;
2300
2301 const Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
2302 const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy,
2303 -(uint64_t)Imm));
2304
2305 for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
2306 Formula F = LU.Formulae[L];
2307 // Use the immediate in the scaled register.
2308 if (F.ScaledReg == OrigReg) {
2309 int64_t Offs = (uint64_t)F.AM.BaseOffs +
2310 Imm * (uint64_t)F.AM.Scale;
2311 // Don't create 50 + reg(-50).
2312 if (F.referencesReg(SE.getSCEV(
2313 ConstantInt::get(IntTy, -(uint64_t)Offs))))
2314 continue;
2315 Formula NewF = F;
2316 NewF.AM.BaseOffs = Offs;
2317 if (!isLegalUse(NewF.AM, LU.Kind, LU.AccessTy, TLI))
2318 continue;
2319 const SCEV *Diff = SE.getAddExpr(NegImmS, NewF.ScaledReg);
2320 if (Diff->isZero()) continue;
2321 NewF.ScaledReg = Diff;
Dan Gohman8b0ade32010-01-21 22:42:49 +00002322 (void)InsertFormula(LU, LUIdx, NewF);
Dan Gohmana10756e2010-01-21 02:09:26 +00002323 }
2324 // Use the immediate in a base register.
2325 for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
2326 const SCEV *BaseReg = F.BaseRegs[N];
2327 if (BaseReg != OrigReg)
2328 continue;
2329 Formula NewF = F;
2330 NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
2331 if (!isLegalUse(NewF.AM, LU.Kind, LU.AccessTy, TLI))
2332 continue;
2333 const SCEV *Diff = SE.getAddExpr(NegImmS, BaseReg);
2334 if (Diff->isZero()) continue;
2335 // Don't create 50 + reg(-50).
2336 if (Diff ==
2337 SE.getSCEV(ConstantInt::get(IntTy,
2338 -(uint64_t)NewF.AM.BaseOffs)))
2339 continue;
2340 NewF.BaseRegs[N] = Diff;
Dan Gohman8b0ade32010-01-21 22:42:49 +00002341 (void)InsertFormula(LU, LUIdx, NewF);
Dan Gohmana10756e2010-01-21 02:09:26 +00002342 }
2343 }
2344 }
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00002345}
2346
Dan Gohmana10756e2010-01-21 02:09:26 +00002347/// GenerateAllReuseFormulae - Generate formulae for each use.
2348void
2349LSRInstance::GenerateAllReuseFormulae() {
2350 SmallVector<Formula, 12> Save;
2351 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
2352 LSRUse &LU = Uses[LUIdx];
2353
2354 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
2355 GenerateSymbolicOffsetReuse(LU, LUIdx, LU.Formulae[i]);
2356 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
2357 GenerateICmpZeroScaledReuse(LU, LUIdx, LU.Formulae[i]);
2358 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
2359 GenerateReassociationReuse(LU, LUIdx, LU.Formulae[i]);
2360 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
2361 GenerateCombinationReuse(LU, LUIdx, LU.Formulae[i]);
2362 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
2363 GenerateScaledReuse(LU, LUIdx, LU.Formulae[i]);
2364 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
2365 GenerateTruncateReuse(LU, LUIdx, LU.Formulae[i]);
2366 }
2367
2368 GenerateConstantOffsetReuse();
2369}
2370
2371/// GenerateLoopInvariantRegisterUses - Check for other uses of loop-invariant
2372/// values which we're tracking. These other uses will pin these values in
2373/// registers, making them less profitable for elimination.
2374/// TODO: This currently misses non-constant addrec step registers.
2375/// TODO: Should this give more weight to users inside the loop?
2376void
2377LSRInstance::GenerateLoopInvariantRegisterUses() {
Dan Gohman8b0ade32010-01-21 22:42:49 +00002378 SmallVector<const SCEV *, 8> Worklist(RegSequence.begin(), RegSequence.end());
2379
2380 while (!Worklist.empty()) {
2381 const SCEV *S = Worklist.pop_back_val();
2382
2383 if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
2384 Worklist.insert(Worklist.end(), N->op_begin(), N->op_end());
2385 else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
2386 Worklist.push_back(C->getOperand());
2387 else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
2388 Worklist.push_back(D->getLHS());
2389 Worklist.push_back(D->getRHS());
2390 } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
Dan Gohmana10756e2010-01-21 02:09:26 +00002391 const Value *V = U->getValue();
2392 if (const Instruction *Inst = dyn_cast<Instruction>(V))
2393 if (L->contains(Inst)) continue;
2394 for (Value::use_const_iterator UI = V->use_begin(), UE = V->use_end();
2395 UI != UE; ++UI) {
2396 const Instruction *UserInst = dyn_cast<Instruction>(*UI);
2397 // Ignore non-instructions.
2398 if (!UserInst)
2399 continue;
2400 // Ignore instructions in other functions (as can happen with
2401 // Constants).
2402 if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
2403 continue;
2404 // Ignore instructions not dominated by the loop.
2405 const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
2406 UserInst->getParent() :
2407 cast<PHINode>(UserInst)->getIncomingBlock(
2408 PHINode::getIncomingValueNumForOperand(UI.getOperandNo()));
2409 if (!DT.dominates(L->getHeader(), UseBB))
2410 continue;
2411 // Ignore uses which are part of other SCEV expressions, to avoid
2412 // analyzing them multiple times.
2413 if (SE.isSCEVable(UserInst->getType()) &&
2414 !isa<SCEVUnknown>(SE.getSCEV(const_cast<Instruction *>(UserInst))))
2415 continue;
2416 // Ignore icmp instructions which are already being analyzed.
2417 if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
2418 unsigned OtherIdx = !UI.getOperandNo();
2419 Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
2420 if (SE.getSCEV(OtherOp)->hasComputableLoopEvolution(L))
2421 continue;
2422 }
2423
2424 LSRUse &LU = getNewUse();
2425 LU.UserInst = const_cast<Instruction *>(UserInst);
2426 LU.OperandValToReplace = UI.getUse();
2427 LU.InsertSupplementalFormula(U);
2428 CountRegisters(LU.Formulae.back(), Uses.size() - 1);
2429 }
2430 }
Dan Gohman8b0ade32010-01-21 22:42:49 +00002431 }
Dan Gohmana10756e2010-01-21 02:09:26 +00002432}
2433
2434#ifndef NDEBUG
2435
2436static void debug_winner(SmallVector<LSRUse, 16> const &Uses) {
2437 dbgs() << "LSR has selected formulae for each use:\n";
2438 for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
2439 E = Uses.end(); I != E; ++I) {
2440 const LSRUse &LU = *I;
2441 dbgs() << " ";
2442 LU.print(dbgs());
2443 dbgs() << '\n';
2444 dbgs() << " ";
2445 LU.Formulae.front().print(dbgs());
2446 dbgs() << "\n";
2447 }
2448}
2449
2450#endif
2451
2452LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
2453 : IU(P->getAnalysis<IVUsers>()),
2454 SE(P->getAnalysis<ScalarEvolution>()),
2455 DT(P->getAnalysis<DominatorTree>()),
Dan Gohman8b0ade32010-01-21 22:42:49 +00002456 TLI(tli), L(l), Changed(false), CurrentArbitraryRegIndex(0), MaxNumRegs(0) {
Devang Patel0f54dcb2007-03-06 21:14:09 +00002457
Dan Gohman03e896b2009-11-05 21:11:53 +00002458 // If LoopSimplify form is not available, stay out of trouble.
Dan Gohmana10756e2010-01-21 02:09:26 +00002459 if (!L->isLoopSimplifyForm()) return;
Dan Gohman03e896b2009-11-05 21:11:53 +00002460
Dan Gohmana10756e2010-01-21 02:09:26 +00002461 // If there's no interesting work to be done, bail early.
2462 if (IU.IVUsesByStride.empty()) return;
Dan Gohman80b0f8c2009-03-09 20:34:59 +00002463
Dan Gohmana10756e2010-01-21 02:09:26 +00002464 DEBUG(dbgs() << "\nLSR on loop ";
2465 WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
2466 dbgs() << ":\n");
Dan Gohmanf7912df2009-03-09 20:46:50 +00002467
Dan Gohmana10756e2010-01-21 02:09:26 +00002468 // Sort the StrideOrder so we process larger strides first.
2469 std::stable_sort(IU.StrideOrder.begin(), IU.StrideOrder.end(),
2470 StrideCompare(SE));
Chris Lattner010de252005-08-08 05:28:22 +00002471
Dan Gohmana10756e2010-01-21 02:09:26 +00002472 /// OptimizeShadowIV - If IV is used in a int-to-float cast
2473 /// inside the loop then try to eliminate the cast opeation.
2474 OptimizeShadowIV();
Evan Cheng5792f512009-05-11 22:33:01 +00002475
Dan Gohmana10756e2010-01-21 02:09:26 +00002476 // Change loop terminating condition to use the postinc iv when possible.
2477 Instruction *IVIncInsertPos;
2478 Changed |= OptimizeLoopTermCond(IVIncInsertPos);
Chris Lattner010de252005-08-08 05:28:22 +00002479
Dan Gohmana10756e2010-01-21 02:09:26 +00002480 for (SmallVectorImpl<const SCEV *>::const_iterator SIter =
2481 IU.StrideOrder.begin(), SEnd = IU.StrideOrder.end();
2482 SIter != SEnd; ++SIter) {
2483 const SCEV *Stride = *SIter;
Misha Brukmanfd939082005-04-21 23:48:37 +00002484
Dan Gohmana10756e2010-01-21 02:09:26 +00002485 // Collect interesting types.
2486 Types.insert(SE.getEffectiveSCEVType(Stride->getType()));
Evan Chengd1d6b5c2006-03-16 21:53:05 +00002487
Dan Gohmana10756e2010-01-21 02:09:26 +00002488 // Collect interesting factors.
2489 for (SmallVectorImpl<const SCEV *>::const_iterator NewStrideIter =
2490 SIter + 1; NewStrideIter != SEnd; ++NewStrideIter) {
2491 const SCEV *OldStride = Stride;
2492 const SCEV *NewStride = *NewStrideIter;
Nate Begemaneaa13852004-10-18 21:08:22 +00002493
Dan Gohmana10756e2010-01-21 02:09:26 +00002494 if (SE.getTypeSizeInBits(OldStride->getType()) !=
2495 SE.getTypeSizeInBits(NewStride->getType())) {
2496 if (SE.getTypeSizeInBits(OldStride->getType()) >
2497 SE.getTypeSizeInBits(NewStride->getType()))
2498 NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
2499 else
2500 OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
2501 }
2502 if (const SCEVConstant *Factor =
2503 dyn_cast_or_null<SCEVConstant>(getSDiv(NewStride, OldStride,
2504 SE, true)))
2505 if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
2506 Factors.insert(Factor->getValue()->getValue().getSExtValue());
2507 }
Dan Gohman6bec5bb2009-12-18 00:06:20 +00002508
Dan Gohmana10756e2010-01-21 02:09:26 +00002509 std::map<const SCEV *, IVUsersOfOneStride *>::const_iterator SI =
2510 IU.IVUsesByStride.find(Stride);
2511 assert(SI != IU.IVUsesByStride.end() && "Stride doesn't exist!");
2512 for (ilist<IVStrideUse>::const_iterator UI = SI->second->Users.begin(),
2513 E = SI->second->Users.end(); UI != E; ++UI) {
2514 // Record the uses.
2515 LSRUse &LU = getNewUse();
2516 LU.UserInst = UI->getUser();
2517 LU.OperandValToReplace = UI->getOperandValToReplace();
2518 if (isAddressUse(LU.UserInst, LU.OperandValToReplace)) {
2519 LU.Kind = LSRUse::Address;
2520 LU.AccessTy = getAccessType(LU.UserInst);
2521 }
2522 if (UI->isUseOfPostIncrementedValue())
2523 LU.PostIncLoop = L;
Dan Gohman6bec5bb2009-12-18 00:06:20 +00002524
Dan Gohmana10756e2010-01-21 02:09:26 +00002525 const SCEV *S = IU.getCanonicalExpr(*UI);
2526
2527 // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
2528 // (N - i == 0), and this allows (N - i) to be the expression that we
2529 // work with rather than just N or i, so we can consider the register
2530 // requirements for both N and i at the same time. Limiting this code
2531 // to equality icmps is not a problem because all interesting loops
2532 // use equality icmps, thanks to IndVarSimplify.
2533 if (ICmpInst *CI = dyn_cast<ICmpInst>(LU.UserInst))
2534 if (CI->isEquality()) {
2535 // Swap the operands if needed to put the OperandValToReplace on
2536 // the left, for consistency.
2537 Value *NV = CI->getOperand(1);
2538 if (NV == LU.OperandValToReplace) {
2539 CI->setOperand(1, CI->getOperand(0));
2540 CI->setOperand(0, NV);
2541 }
2542
2543 // x == y --> x - y == 0
2544 const SCEV *N = SE.getSCEV(NV);
2545 if (N->isLoopInvariant(L)) {
2546 LU.Kind = LSRUse::ICmpZero;
2547 S = SE.getMinusSCEV(N, S);
2548 }
2549
2550 // -1 and the negations of all interesting strides (except the
2551 // negation of -1) are now also interesting.
2552 for (size_t i = 0, e = Factors.size(); i != e; ++i)
2553 if (Factors[i] != -1)
2554 Factors.insert(-(uint64_t)Factors[i]);
2555 Factors.insert(-1);
2556 }
2557
2558 // Ok, now enumerate all the different formulae we can find to compute
2559 // the value for this expression.
2560 LU.InsertInitialFormula(S, L, SE, DT);
2561 CountRegisters(LU.Formulae.back(), Uses.size() - 1);
2562 }
Evan Cheng81ebdcf2009-11-10 21:14:05 +00002563 }
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00002564
Dan Gohmana10756e2010-01-21 02:09:26 +00002565 // If all uses use the same type, don't bother looking for truncation-based
2566 // reuse.
2567 if (Types.size() == 1)
2568 Types.clear();
2569
Dan Gohmana10756e2010-01-21 02:09:26 +00002570 // If there are any uses of registers that we're tracking that have escaped
2571 // IVUsers' attention, add trivial uses for them, so that the register
2572 // voting process takes the into consideration.
2573 GenerateLoopInvariantRegisterUses();
2574
Dan Gohman8b0ade32010-01-21 22:42:49 +00002575 // Start by assuming we'll assign each use its own register. This is
2576 // sometimes called "full" strength reduction, or "superhero" mode.
2577 // Sometimes this is the best solution, but if there are opportunities for
2578 // reuse we may find a better solution.
2579 Score CurScore;
2580 CurScore.RateInitial(Uses, L, SE);
2581
2582 MaxNumRegs = CurScore.getNumRegs();
2583
2584 // Now use the reuse data to generate a bunch of interesting ways
2585 // to formulate the values needed for the uses.
2586 GenerateAllReuseFormulae();
2587
Dan Gohmana10756e2010-01-21 02:09:26 +00002588 // Sort the formulae. TODO: This is redundantly sorted below.
2589 for (SmallVectorImpl<LSRUse>::iterator I = Uses.begin(), E = Uses.end();
2590 I != E; ++I) {
2591 LSRUse &LU = *I;
2592 std::stable_sort(LU.Formulae.begin(), LU.Formulae.end(),
2593 ComplexitySorter());
2594 }
2595
2596 // Ok, we've now collected all the uses and noted their register uses. The
2597 // next step is to start looking at register reuse possibilities.
2598 DEBUG(print(dbgs()); dbgs() << '\n');
2599
Dan Gohmana10756e2010-01-21 02:09:26 +00002600 // Create a sorted list of registers with those with the most uses appearing
2601 // earlier in the list. We'll visit them first, as they're the most likely
2602 // to represent profitable reuse opportunities.
2603 SmallVector<RegCount, 8> RegOrder;
2604 for (SmallVectorImpl<const SCEV *>::const_iterator I =
2605 RegSequence.begin(), E = RegSequence.end(); I != E; ++I)
2606 RegOrder.push_back(RegCount(*I, RegUses.find(*I)->second));
2607 std::stable_sort(RegOrder.begin(), RegOrder.end());
2608
2609 // Visit each register. Determine which ones represent profitable reuse
2610 // opportunities and remember them.
2611 // TODO: Extract this code into a function.
2612 for (SmallVectorImpl<RegCount>::const_iterator I = RegOrder.begin(),
2613 E = RegOrder.end(); I != E; ++I) {
2614 const SCEV *Reg = I->Reg;
2615 const SmallBitVector &Bits = I->Sort.Bits;
2616
2617 // Registers with only one use don't represent reuse opportunities, so
2618 // when we get there, we're done.
2619 if (Bits.count() <= 1) break;
2620
2621 DEBUG(dbgs() << "Reg " << *Reg << ": ";
2622 I->Sort.print(dbgs());
2623 dbgs() << '\n');
2624
2625 // Determine the total number of registers will be needed if we make use
2626 // of the reuse opportunity represented by the current register.
2627 Score NewScore;
2628 NewScore.Rate(Reg, Bits, Uses, L, SE);
2629
2630 // Now decide whether this register's reuse opportunity is an overall win.
2631 // Currently the decision is heavily skewed for register pressure.
2632 if (!(NewScore < CurScore)) {
2633 continue;
2634 }
2635
2636 // Ok, use this opportunity.
2637 DEBUG(dbgs() << "This candidate has been accepted.\n");
2638 CurScore = NewScore;
2639
2640 // Now that we've selected a new reuse opportunity, delete formulae that
2641 // do not participate in that opportunity.
2642 for (int j = Bits.find_first(); j != -1; j = Bits.find_next(j)) {
2643 LSRUse &LU = Uses[j];
2644 for (unsigned k = 0, h = LU.Formulae.size(); k != h; ++k) {
2645 Formula &F = LU.Formulae[k];
2646 if (!F.referencesReg(Reg)) {
2647 std::swap(LU.Formulae[k], LU.Formulae.back());
2648 LU.Formulae.pop_back();
2649 --k; --h;
2650 }
2651 }
2652 // Also re-sort the list to put the formulae with the fewest registers
2653 // at the front.
2654 // TODO: Do this earlier, we don't need it each time.
2655 std::stable_sort(LU.Formulae.begin(), LU.Formulae.end(),
2656 ComplexitySorter());
2657 }
2658 }
2659
2660 // Ok, we've now made all our decisions. The first formula for each use
2661 // will be used.
2662 DEBUG(dbgs() << "Concluding, we need "; CurScore.print(dbgs());
2663 dbgs() << ".\n";
2664 debug_winner(Uses));
2665
2666 // Free memory no longer needed.
2667 RegOrder.clear();
2668 Factors.clear();
2669 Types.clear();
2670 RegUses.clear();
2671 RegSequence.clear();
2672
2673 // Keep track of instructions we may have made dead, so that
2674 // we can remove them after we are done working.
2675 SmallVector<WeakVH, 16> DeadInsts;
2676
2677 SCEVExpander Rewriter(SE);
2678 Rewriter.disableCanonicalMode();
2679 Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
2680
2681 // Expand the new value definitions and update the users.
2682 for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
2683 E = Uses.end(); I != E; ++I) {
2684 // Formulae should be legal.
2685 DEBUG(for (SmallVectorImpl<Formula>::const_iterator J = I->Formulae.begin(),
2686 JE = I->Formulae.end(); J != JE; ++J)
2687 assert(isLegalUse(J->AM, I->Kind, I->AccessTy, TLI) &&
2688 "Illegal formula generated!"));
2689
2690 // Expand the new code and update the user.
2691 I->Rewrite(L, Rewriter, DeadInsts, SE, DT, P);
2692 Changed = true;
2693 }
2694
2695 // Clean up after ourselves. This must be done before deleting any
2696 // instructions.
2697 Rewriter.clear();
2698
2699 Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
2700}
2701
2702void LSRInstance::print(raw_ostream &OS) const {
Dan Gohman8b0ade32010-01-21 22:42:49 +00002703 if (MaxNumRegs != 0)
2704 OS << "LSR is considering " << MaxNumRegs << " to be the maximum "
2705 "number of registers needed.\n";
2706
Dan Gohmana10756e2010-01-21 02:09:26 +00002707 OS << "LSR has identified the following interesting factors and types: ";
2708 bool First = true;
2709
2710 for (SmallSetVector<int64_t, 4>::const_iterator
2711 I = Factors.begin(), E = Factors.end(); I != E; ++I) {
2712 if (!First) OS << ", ";
2713 First = false;
2714 OS << '*' << *I;
2715 }
2716
2717 for (SmallSetVector<const Type *, 4>::const_iterator
2718 I = Types.begin(), E = Types.end(); I != E; ++I) {
2719 if (!First) OS << ", ";
2720 First = false;
2721 OS << '(' << **I << ')';
2722 }
2723 OS << '\n';
2724
2725 OS << "LSR is examining the following uses, and candidate formulae:\n";
2726 for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
2727 E = Uses.end(); I != E; ++I) {
2728 const LSRUse &LU = *I;
2729 dbgs() << " ";
2730 LU.print(OS);
2731 OS << '\n';
2732 for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
2733 JE = LU.Formulae.end(); J != JE; ++J) {
2734 OS << " ";
2735 J->print(OS);
2736 OS << "\n";
2737 }
2738 }
2739}
2740
2741void LSRInstance::dump() const {
2742 print(errs()); errs() << '\n';
2743}
2744
2745namespace {
2746
2747class LoopStrengthReduce : public LoopPass {
2748 /// TLI - Keep a pointer of a TargetLowering to consult for determining
2749 /// transformation profitability.
2750 const TargetLowering *const TLI;
2751
2752public:
2753 static char ID; // Pass ID, replacement for typeid
2754 explicit LoopStrengthReduce(const TargetLowering *tli = NULL);
2755
2756private:
2757 bool runOnLoop(Loop *L, LPPassManager &LPM);
2758 void getAnalysisUsage(AnalysisUsage &AU) const;
2759};
2760
2761}
2762
2763char LoopStrengthReduce::ID = 0;
2764static RegisterPass<LoopStrengthReduce>
2765X("loop-reduce", "Loop Strength Reduction");
2766
2767Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
2768 return new LoopStrengthReduce(TLI);
2769}
2770
2771LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
2772 : LoopPass(&ID), TLI(tli) {}
2773
2774void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
2775 // We split critical edges, so we change the CFG. However, we do update
2776 // many analyses if they are around.
2777 AU.addPreservedID(LoopSimplifyID);
2778 AU.addPreserved<LoopInfo>();
2779 AU.addPreserved("domfrontier");
2780
2781 AU.addRequiredID(LoopSimplifyID);
2782 AU.addRequired<DominatorTree>();
2783 AU.addPreserved<DominatorTree>();
2784 AU.addRequired<ScalarEvolution>();
2785 AU.addPreserved<ScalarEvolution>();
2786 AU.addRequired<IVUsers>();
2787 AU.addPreserved<IVUsers>();
2788}
2789
2790bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
2791 bool Changed = false;
2792
2793 // Run the main LSR transformation.
2794 Changed |= LSRInstance(TLI, L, this).getChanged();
2795
Dan Gohmanafc36a92009-05-02 18:29:22 +00002796 // At this point, it is worth checking to see if any recurrence PHIs are also
Dan Gohman35738ac2009-05-04 22:30:44 +00002797 // dead, so that we can remove them as well.
Dan Gohman9fff2182010-01-05 16:31:45 +00002798 Changed |= DeleteDeadPHIs(L->getHeader());
Dan Gohmanafc36a92009-05-02 18:29:22 +00002799
Evan Cheng1ce75dc2008-07-07 19:51:32 +00002800 return Changed;
Nate Begemaneaa13852004-10-18 21:08:22 +00002801}