blob: b7a733c91ffb04f91261e912cb2ae5b66e9f9f9b [file] [log] [blame]
Dan Gohman2d1be872009-04-16 03:18:22 +00001//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
Misha Brukmanfd939082005-04-21 23:48:37 +00002//
Nate Begemaneaa13852004-10-18 21:08:22 +00003// The LLVM Compiler Infrastructure
4//
Chris Lattner4ee451d2007-12-29 20:36:04 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Misha Brukmanfd939082005-04-21 23:48:37 +00007//
Nate Begemaneaa13852004-10-18 21:08:22 +00008//===----------------------------------------------------------------------===//
9//
Dan Gohmancec8f9d2009-05-19 20:37:36 +000010// This transformation analyzes and transforms the induction variables (and
11// computations derived from them) into forms suitable for efficient execution
12// on the target.
13//
Nate Begemaneaa13852004-10-18 21:08:22 +000014// This pass performs a strength reduction on array references inside loops that
Dan Gohmancec8f9d2009-05-19 20:37:36 +000015// have as one or more of their components the loop induction variable, it
16// rewrites expressions to take advantage of scaled-index addressing modes
17// available on the target, and it performs a variety of other optimizations
18// related to loop induction variables.
Nate Begemaneaa13852004-10-18 21:08:22 +000019//
Dan Gohmana10756e2010-01-21 02:09:26 +000020// Terminology note: this code has a lot of handling for "post-increment" or
21// "post-inc" users. This is not talking about post-increment addressing modes;
22// it is instead talking about code like this:
23//
24// %i = phi [ 0, %entry ], [ %i.next, %latch ]
25// ...
26// %i.next = add %i, 1
27// %c = icmp eq %i.next, %n
28//
29// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
30// it's useful to think about these as the same register, with some uses using
31// the value of the register before the add and some using // it after. In this
32// example, the icmp is a post-increment user, since it uses %i.next, which is
33// the value of the induction variable after the increment. The other common
34// case of post-increment users is users outside the loop.
35//
36// TODO: More sophistication in the way Formulae are generated.
37//
38// TODO: Handle multiple loops at a time.
39//
40// TODO: test/CodeGen/X86/full-lsr.ll should get full lsr. The problem is
41// that {0,+,1}<%bb> is getting picked first because all 7 uses can
42// use it, and while it's a pretty good solution, it means that LSR
43// doesn't look further to find an even better solution.
44//
45// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
46// instead of a GlobalValue?
47//
48// TODO: When truncation is free, truncate ICmp users' operands to make it a
49// smaller encoding (on x86 at least).
50//
51// TODO: When a negated register is used by an add (such as in a list of
52// multiple base registers, or as the increment expression in an addrec),
53// we may not actually need both reg and (-1 * reg) in registers; the
54// negation can be implemented by using a sub instead of an add. The
55// lack of support for taking this into consideration when making
56// register pressure decisions is partly worked around by the "Special"
57// use kind.
58//
Nate Begemaneaa13852004-10-18 21:08:22 +000059//===----------------------------------------------------------------------===//
60
Chris Lattnerbe3e5212005-08-03 23:30:08 +000061#define DEBUG_TYPE "loop-reduce"
Nate Begemaneaa13852004-10-18 21:08:22 +000062#include "llvm/Transforms/Scalar.h"
63#include "llvm/Constants.h"
64#include "llvm/Instructions.h"
Dan Gohmane5b01be2007-05-04 14:59:09 +000065#include "llvm/IntrinsicInst.h"
Jeff Cohen2f3c9b72005-03-04 04:04:26 +000066#include "llvm/DerivedTypes.h"
Dan Gohman81db61a2009-05-12 02:17:14 +000067#include "llvm/Analysis/IVUsers.h"
Dan Gohmana10756e2010-01-21 02:09:26 +000068#include "llvm/Analysis/Dominators.h"
Devang Patel0f54dcb2007-03-06 21:14:09 +000069#include "llvm/Analysis/LoopPass.h"
Nate Begeman16997482005-07-30 00:15:07 +000070#include "llvm/Analysis/ScalarEvolutionExpander.h"
Chris Lattnere0391be2005-08-12 22:06:11 +000071#include "llvm/Transforms/Utils/BasicBlockUtils.h"
Nate Begemaneaa13852004-10-18 21:08:22 +000072#include "llvm/Transforms/Utils/Local.h"
Dan Gohmana10756e2010-01-21 02:09:26 +000073#include "llvm/ADT/SmallBitVector.h"
74#include "llvm/ADT/SetVector.h"
Nate Begeman16997482005-07-30 00:15:07 +000075#include "llvm/Support/Debug.h"
Dan Gohmanafc36a92009-05-02 18:29:22 +000076#include "llvm/Support/ValueHandle.h"
Daniel Dunbar460f6562009-07-26 09:48:23 +000077#include "llvm/Support/raw_ostream.h"
Evan Chengd277f2c2006-03-13 23:14:23 +000078#include "llvm/Target/TargetLowering.h"
Jeff Cohencfb1d422005-07-30 18:22:27 +000079#include <algorithm>
Nate Begemaneaa13852004-10-18 21:08:22 +000080using namespace llvm;
81
Dan Gohmana10756e2010-01-21 02:09:26 +000082namespace {
Nate Begemaneaa13852004-10-18 21:08:22 +000083
Dan Gohmana10756e2010-01-21 02:09:26 +000084// Constant strides come first which in turns are sorted by their absolute
85// values. If absolute values are the same, then positive strides comes first.
86// e.g.
87// 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X
88struct StrideCompare {
89 const ScalarEvolution &SE;
90 explicit StrideCompare(const ScalarEvolution &se) : SE(se) {}
91
92 bool operator()(const SCEV *const &LHS, const SCEV *const &RHS) const {
93 const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS);
94 const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
95 if (LHSC && RHSC) {
96 unsigned BitWidth = std::max(SE.getTypeSizeInBits(LHS->getType()),
97 SE.getTypeSizeInBits(RHS->getType()));
98 APInt LV = LHSC->getValue()->getValue();
99 APInt RV = RHSC->getValue()->getValue();
100 LV.sextOrTrunc(BitWidth);
101 RV.sextOrTrunc(BitWidth);
102 APInt ALV = LV.abs();
103 APInt ARV = RV.abs();
104 if (ALV == ARV) {
105 if (LV != RV)
106 return LV.sgt(RV);
107 } else {
108 return ALV.ult(ARV);
109 }
110
111 // If it's the same value but different type, sort by bit width so
112 // that we emit larger induction variables before smaller
113 // ones, letting the smaller be re-written in terms of larger ones.
114 return SE.getTypeSizeInBits(RHS->getType()) <
115 SE.getTypeSizeInBits(LHS->getType());
116 }
117 return LHSC && !RHSC;
118 }
119};
120
121/// RegSortData - This class holds data which is used to order reuse
122/// candidates.
123class RegSortData {
124public:
125 /// Bits - This represents the set of LSRUses (by index) which reference a
126 /// particular register.
127 SmallBitVector Bits;
128
129 /// MaxComplexity - This represents the greatest complexity (see the comments
130 /// on Formula::getComplexity) seen with a particular register.
131 uint32_t MaxComplexity;
132
133 /// Index - This holds an arbitrary value used as a last-resort tie breaker
134 /// to ensure deterministic behavior.
135 unsigned Index;
136
137 RegSortData() {}
138
139 void print(raw_ostream &OS) const;
140 void dump() const;
141};
142
143}
144
145void RegSortData::print(raw_ostream &OS) const {
146 OS << "[NumUses=" << Bits.count()
147 << ", MaxComplexity=";
148 OS.write_hex(MaxComplexity);
149 OS << ", Index=" << Index << ']';
150}
151
152void RegSortData::dump() const {
153 print(errs()); errs() << '\n';
154}
Dan Gohmanc17e0cf2009-02-20 04:17:46 +0000155
Chris Lattner0e5f4992006-12-19 21:40:18 +0000156namespace {
Dale Johannesendc42f482007-03-20 00:47:50 +0000157
Dan Gohmana10756e2010-01-21 02:09:26 +0000158/// RegCount - This is a helper class to sort a given set of registers
159/// according to associated RegSortData values.
160class RegCount {
161public:
162 const SCEV *Reg;
163 RegSortData Sort;
Dale Johannesendc42f482007-03-20 00:47:50 +0000164
Dan Gohmana10756e2010-01-21 02:09:26 +0000165 RegCount(const SCEV *R, const RegSortData &RSD)
166 : Reg(R), Sort(RSD) {}
Evan Chengd1d6b5c2006-03-16 21:53:05 +0000167
Dan Gohmana10756e2010-01-21 02:09:26 +0000168 // Sort by count. Returning true means the register is preferred.
169 bool operator<(const RegCount &Other) const {
170 // Sort by the number of unique uses of this register.
171 unsigned A = Sort.Bits.count();
172 unsigned B = Other.Sort.Bits.count();
173 if (A != B) return A > B;
Evan Chengd1d6b5c2006-03-16 21:53:05 +0000174
Dan Gohmana10756e2010-01-21 02:09:26 +0000175 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
176 const SCEVAddRecExpr *BR = dyn_cast<SCEVAddRecExpr>(Other.Reg);
177 // AddRecs have higher priority than other things.
178 if (!BR) return true;
Evan Chengd1d6b5c2006-03-16 21:53:05 +0000179
Dan Gohmana10756e2010-01-21 02:09:26 +0000180 // Prefer affine values.
181 if (AR->isAffine() != BR->isAffine())
182 return AR->isAffine();
Nate Begeman16997482005-07-30 00:15:07 +0000183
Dan Gohmana10756e2010-01-21 02:09:26 +0000184 const Loop *AL = AR->getLoop();
185 const Loop *BL = BR->getLoop();
186 if (AL != BL) {
187 unsigned ADepth = AL->getLoopDepth();
188 unsigned BDepth = BL->getLoopDepth();
189 // Prefer a less deeply nested addrec.
190 if (ADepth != BDepth)
191 return ADepth < BDepth;
Chris Lattner7e608bb2005-08-02 02:52:02 +0000192
Dan Gohmana10756e2010-01-21 02:09:26 +0000193 // Different loops at the same depth; do something arbitrary.
194 BasicBlock *AH = AL->getHeader();
195 BasicBlock *BH = BL->getHeader();
196 for (Function::iterator I = AH, E = AH->getParent()->end(); I != E; ++I)
197 if (&*I == BH) return true;
198 return false;
Bill Wendling411052b2008-11-29 03:43:04 +0000199 }
Jim Grosbach56a1f802009-11-17 17:53:56 +0000200
Dan Gohmana10756e2010-01-21 02:09:26 +0000201 // Sort addrecs by stride.
202 const SCEV *AStep = AR->getOperand(1);
203 const SCEV *BStep = BR->getOperand(1);
204 if (AStep != BStep) {
205 if (const SCEVConstant *AC = dyn_cast<SCEVConstant>(AStep)) {
206 const SCEVConstant *BC = dyn_cast<SCEVConstant>(BStep);
207 if (!BC) return true;
208 // Arbitrarily prefer wider registers.
209 if (AC->getValue()->getValue().getBitWidth() !=
210 BC->getValue()->getValue().getBitWidth())
211 return AC->getValue()->getValue().getBitWidth() >
212 BC->getValue()->getValue().getBitWidth();
213 // Ignore the sign bit, assuming that striding by a negative value
214 // is just as easy as by a positive value.
215 // Prefer the addrec with the lesser absolute value stride, as it
216 // will allow uses to have simpler addressing modes.
217 return AC->getValue()->getValue().abs()
218 .ult(BC->getValue()->getValue().abs());
219 }
220 }
221
222 // Then sort by the register which will permit the simplest uses.
223 // This is a heuristic; currently we only track the most complex use as a
224 // representative.
225 if (Sort.MaxComplexity != Other.Sort.MaxComplexity)
226 return Sort.MaxComplexity < Other.Sort.MaxComplexity;
227
228 // Then sort them by their start values.
229 const SCEV *AStart = AR->getStart();
230 const SCEV *BStart = BR->getStart();
231 if (AStart != BStart) {
232 if (const SCEVConstant *AC = dyn_cast<SCEVConstant>(AStart)) {
233 const SCEVConstant *BC = dyn_cast<SCEVConstant>(BStart);
234 if (!BC) return true;
235 // Arbitrarily prefer wider registers.
236 if (AC->getValue()->getValue().getBitWidth() !=
237 BC->getValue()->getValue().getBitWidth())
238 return AC->getValue()->getValue().getBitWidth() >
239 BC->getValue()->getValue().getBitWidth();
240 // Prefer positive over negative if the absolute values are the same.
241 if (AC->getValue()->getValue().abs() ==
242 BC->getValue()->getValue().abs())
243 return AC->getValue()->getValue().isStrictlyPositive();
244 // Prefer the addrec with the lesser absolute value start.
245 return AC->getValue()->getValue().abs()
246 .ult(BC->getValue()->getValue().abs());
247 }
248 }
249 } else {
250 // AddRecs have higher priority than other things.
251 if (isa<SCEVAddRecExpr>(Other.Reg)) return false;
252 // Sort by the register which will permit the simplest uses.
253 // This is a heuristic; currently we only track the most complex use as a
254 // representative.
255 if (Sort.MaxComplexity != Other.Sort.MaxComplexity)
256 return Sort.MaxComplexity < Other.Sort.MaxComplexity;
257 }
258
259
260 // Tie-breaker: the arbitrary index, to ensure a reliable ordering.
261 return Sort.Index < Other.Sort.Index;
Nate Begemaneaa13852004-10-18 21:08:22 +0000262 }
Dan Gohmana10756e2010-01-21 02:09:26 +0000263
264 void print(raw_ostream &OS) const;
265 void dump() const;
266};
267
268}
269
270void RegCount::print(raw_ostream &OS) const {
271 OS << *Reg << ':';
272 Sort.print(OS);
273}
274
275void RegCount::dump() const {
276 print(errs()); errs() << '\n';
277}
278
279namespace {
280
281/// Formula - This class holds information that describes a formula for
282/// satisfying a use. It may include broken-out immediates and scaled registers.
283struct Formula {
284 /// AM - This is used to represent complex addressing, as well as other kinds
285 /// of interesting uses.
286 TargetLowering::AddrMode AM;
287
288 /// BaseRegs - The list of "base" registers for this use. When this is
289 /// non-empty, AM.HasBaseReg should be set to true.
290 SmallVector<const SCEV *, 2> BaseRegs;
291
292 /// ScaledReg - The 'scaled' register for this use. This should be non-null
293 /// when AM.Scale is not zero.
294 const SCEV *ScaledReg;
295
296 Formula() : ScaledReg(0) {}
297
298 unsigned getNumRegs() const;
299 uint32_t getComplexity() const;
300 const Type *getType() const;
301
302 void InitialMatch(const SCEV *S, Loop *L,
303 ScalarEvolution &SE, DominatorTree &DT);
304
305 /// referencesReg - Test if this formula references the given register.
306 bool referencesReg(const SCEV *S) const {
307 return S == ScaledReg ||
308 std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
309 }
310
311 bool operator==(const Formula &Other) const {
312 return BaseRegs == Other.BaseRegs &&
313 ScaledReg == Other.ScaledReg &&
314 AM.Scale == Other.AM.Scale &&
315 AM.BaseOffs == Other.AM.BaseOffs &&
316 AM.BaseGV == Other.AM.BaseGV;
317 }
318
319 // This sorts at least partially based on host pointer values which are
320 // not deterministic, so it is only usable for uniqification.
321 bool operator<(const Formula &Other) const {
322 if (BaseRegs != Other.BaseRegs)
323 return BaseRegs < Other.BaseRegs;
324 if (ScaledReg != Other.ScaledReg)
325 return ScaledReg < Other.ScaledReg;
326 if (AM.Scale != Other.AM.Scale)
327 return AM.Scale < Other.AM.Scale;
328 if (AM.BaseOffs != Other.AM.BaseOffs)
329 return AM.BaseOffs < Other.AM.BaseOffs;
330 if (AM.BaseGV != Other.AM.BaseGV)
331 return AM.BaseGV < Other.AM.BaseGV;
332 return false;
333 }
334
335 void print(raw_ostream &OS) const;
336 void dump() const;
337};
338
339}
340
341/// getNumRegs - Return the total number of register operands used by this
Dan Gohman2f524582010-01-21 21:31:09 +0000342/// formula. This does not include register uses implied by non-constant
343/// addrec strides.
Dan Gohmana10756e2010-01-21 02:09:26 +0000344unsigned Formula::getNumRegs() const {
345 return !!ScaledReg + BaseRegs.size();
346}
347
348/// getComplexity - Return an oversimplified value indicating the complexity
349/// of this formula. This is used as a tie-breaker in choosing register
350/// preferences.
351uint32_t Formula::getComplexity() const {
352 // Encode the information in a uint32_t so that comparing with operator<
353 // will be interesting.
354 return
355 // Most significant, the number of registers. This saturates because we
356 // need the bits, and because beyond a few hundred it doesn't really matter.
357 (std::min(getNumRegs(), (1u<<15)-1) << 17) |
358 // Having multiple base regs is worse than having a base reg and a scale.
359 ((BaseRegs.size() > 1) << 16) |
360 // Scale absolute value.
361 ((AM.Scale != 0 ? (Log2_64(abs64(AM.Scale)) + 1) : 0u) << 9) |
362 // Scale sign, which is less significant than the absolute value.
363 ((AM.Scale < 0) << 8) |
364 // Offset absolute value.
365 ((AM.BaseOffs != 0 ? (Log2_64(abs64(AM.BaseOffs)) + 1) : 0u) << 1) |
366 // If a GV is present, treat it like a maximal offset.
367 ((AM.BaseGV ? ((1u<<7)-1) : 0) << 1) |
368 // Offset sign, which is less significant than the absolute offset.
369 ((AM.BaseOffs < 0) << 0);
370}
371
372/// getType - Return the type of this formula, if it has one, or null
373/// otherwise. This type is meaningless except for the bit size.
374const Type *Formula::getType() const {
375 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
376 ScaledReg ? ScaledReg->getType() :
377 AM.BaseGV ? AM.BaseGV->getType() :
378 0;
379}
380
381namespace {
382
383/// ComplexitySorter - A predicate which orders Formulae by the number of
384/// registers they contain.
385struct ComplexitySorter {
386 bool operator()(const Formula &LHS, const Formula &RHS) const {
387 unsigned L = LHS.getNumRegs();
388 unsigned R = RHS.getNumRegs();
389 if (L != R) return L < R;
390
391 return LHS.getComplexity() < RHS.getComplexity();
392 }
393};
394
395}
396
397/// DoInitialMatch - Recurrsion helper for InitialMatch.
398static void DoInitialMatch(const SCEV *S, Loop *L,
399 SmallVectorImpl<const SCEV *> &Good,
400 SmallVectorImpl<const SCEV *> &Bad,
401 ScalarEvolution &SE, DominatorTree &DT) {
402 // Collect expressions which properly dominate the loop header.
403 if (S->properlyDominates(L->getHeader(), &DT)) {
404 Good.push_back(S);
405 return;
406 }
407
408 // Look at add operands.
409 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
410 for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
411 I != E; ++I)
412 DoInitialMatch(*I, L, Good, Bad, SE, DT);
413 return;
414 }
415
416 // Look at addrec operands.
417 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
418 if (!AR->getStart()->isZero()) {
419 DoInitialMatch(AR->getStart(), L, Good, Bad, SE, DT);
420 DoInitialMatch(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()),
421 AR->getStepRecurrence(SE),
422 AR->getLoop()),
423 L, Good, Bad, SE, DT);
424 return;
425 }
426 }
427
428 // Handle a multiplication by -1 (negation) if it didn't fold.
429 if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
430 if (Mul->getOperand(0)->isAllOnesValue()) {
431 SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end());
432 const SCEV *NewMul = SE.getMulExpr(Ops);
433
434 SmallVector<const SCEV *, 4> MyGood;
435 SmallVector<const SCEV *, 4> MyBad;
436 DoInitialMatch(NewMul, L, MyGood, MyBad, SE, DT);
437 const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
438 SE.getEffectiveSCEVType(NewMul->getType())));
439 for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(),
440 E = MyGood.end(); I != E; ++I)
441 Good.push_back(SE.getMulExpr(NegOne, *I));
442 for (SmallVectorImpl<const SCEV *>::const_iterator I = MyBad.begin(),
443 E = MyBad.end(); I != E; ++I)
444 Bad.push_back(SE.getMulExpr(NegOne, *I));
445 return;
446 }
447
448 // Ok, we can't do anything interesting. Just stuff the whole thing into a
449 // register and hope for the best.
450 Bad.push_back(S);
451}
452
453/// InitialMatch - Incorporate loop-variant parts of S into this Formula,
454/// attempting to keep all loop-invariant and loop-computable values in a
455/// single base register.
456void Formula::InitialMatch(const SCEV *S, Loop *L,
457 ScalarEvolution &SE, DominatorTree &DT) {
458 SmallVector<const SCEV *, 4> Good;
459 SmallVector<const SCEV *, 4> Bad;
460 DoInitialMatch(S, L, Good, Bad, SE, DT);
461 if (!Good.empty()) {
462 BaseRegs.push_back(SE.getAddExpr(Good));
463 AM.HasBaseReg = true;
464 }
465 if (!Bad.empty()) {
466 BaseRegs.push_back(SE.getAddExpr(Bad));
467 AM.HasBaseReg = true;
468 }
469}
470
471void Formula::print(raw_ostream &OS) const {
472 bool First = true;
473 if (AM.BaseGV) {
474 if (!First) OS << " + "; else First = false;
475 WriteAsOperand(OS, AM.BaseGV, /*PrintType=*/false);
476 }
477 if (AM.BaseOffs != 0) {
478 if (!First) OS << " + "; else First = false;
479 OS << AM.BaseOffs;
480 }
481 for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
482 E = BaseRegs.end(); I != E; ++I) {
483 if (!First) OS << " + "; else First = false;
484 OS << "reg(";
485 OS << **I;
486 OS << ")";
487 }
488 if (AM.Scale != 0) {
489 if (!First) OS << " + "; else First = false;
490 OS << AM.Scale << "*reg(";
491 if (ScaledReg)
492 OS << *ScaledReg;
493 else
494 OS << "<unknown>";
495 OS << ")";
496 }
497}
498
499void Formula::dump() const {
500 print(errs()); errs() << '\n';
501}
502
503/// getSDiv - Return an expression for LHS /s RHS, if it can be determined,
504/// or null otherwise. If IgnoreSignificantBits is true, expressions like
505/// (X * Y) /s Y are simplified to Y, ignoring that the multiplication may
506/// overflow, which is useful when the result will be used in a context where
507/// the most significant bits are ignored.
508static const SCEV *getSDiv(const SCEV *LHS, const SCEV *RHS,
509 ScalarEvolution &SE,
510 bool IgnoreSignificantBits = false) {
511 // Handle the trivial case, which works for any SCEV type.
512 if (LHS == RHS)
513 return SE.getIntegerSCEV(1, LHS->getType());
514
515 // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do some
516 // folding.
517 if (RHS->isAllOnesValue())
518 return SE.getMulExpr(LHS, RHS);
519
520 // Check for a division of a constant by a constant.
521 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
522 const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
523 if (!RC)
524 return 0;
525 if (C->getValue()->getValue().srem(RC->getValue()->getValue()) != 0)
526 return 0;
527 return SE.getConstant(C->getValue()->getValue()
528 .sdiv(RC->getValue()->getValue()));
529 }
530
531 // Distribute the sdiv over addrec operands.
532 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
533 const SCEV *Start = getSDiv(AR->getStart(), RHS, SE,
534 IgnoreSignificantBits);
535 if (!Start) return 0;
536 const SCEV *Step = getSDiv(AR->getStepRecurrence(SE), RHS, SE,
537 IgnoreSignificantBits);
538 if (!Step) return 0;
539 return SE.getAddRecExpr(Start, Step, AR->getLoop());
540 }
541
542 // Distribute the sdiv over add operands.
543 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
544 SmallVector<const SCEV *, 8> Ops;
545 for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
546 I != E; ++I) {
547 const SCEV *Op = getSDiv(*I, RHS, SE,
548 IgnoreSignificantBits);
549 if (!Op) return 0;
550 Ops.push_back(Op);
551 }
552 return SE.getAddExpr(Ops);
553 }
554
555 // Check for a multiply operand that we can pull RHS out of.
556 if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS))
557 if (IgnoreSignificantBits || Mul->hasNoSignedWrap()) {
558 SmallVector<const SCEV *, 4> Ops;
559 bool Found = false;
560 for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end();
561 I != E; ++I) {
562 if (!Found)
563 if (const SCEV *Q = getSDiv(*I, RHS, SE, IgnoreSignificantBits)) {
564 Ops.push_back(Q);
565 Found = true;
566 continue;
567 }
568 Ops.push_back(*I);
569 }
570 return Found ? SE.getMulExpr(Ops) : 0;
571 }
572
573 // Otherwise we don't know.
574 return 0;
575}
576
577namespace {
578
579/// LSRUse - This class holds the state that LSR keeps for each use in
580/// IVUsers, as well as uses invented by LSR itself. It includes information
581/// about what kinds of things can be folded into the user, information
582/// about the user itself, and information about how the use may be satisfied.
583/// TODO: Represent multiple users of the same expression in common?
584class LSRUse {
585 SmallSet<Formula, 8> FormulaeUniquifier;
586
587public:
588 /// KindType - An enum for a kind of use, indicating what types of
589 /// scaled and immediate operands it might support.
590 enum KindType {
591 Basic, ///< A normal use, with no folding.
592 Special, ///< A special case of basic, allowing -1 scales.
593 Address, ///< An address use; folding according to TargetLowering
594 ICmpZero ///< An equality icmp with both operands folded into one.
595 // TODO: Add a generic icmp too?
596 };
597
598 KindType Kind;
599 const Type *AccessTy;
600 Instruction *UserInst;
601 Value *OperandValToReplace;
602
603 /// PostIncLoop - If this user is to use the post-incremented value of an
604 /// induction variable, this variable is non-null and holds the loop
605 /// associated with the induction variable.
606 const Loop *PostIncLoop;
607
608 /// Formulae - A list of ways to build a value that can satisfy this user.
609 /// After the list is populated, one of these is selected heuristically and
610 /// used to formulate a replacement for OperandValToReplace in UserInst.
611 SmallVector<Formula, 12> Formulae;
612
613 LSRUse() : Kind(Basic), AccessTy(0),
614 UserInst(0), OperandValToReplace(0), PostIncLoop(0) {}
615
616 void InsertInitialFormula(const SCEV *S, Loop *L,
617 ScalarEvolution &SE, DominatorTree &DT);
618 void InsertSupplementalFormula(const SCEV *S);
619
620 bool InsertFormula(const Formula &F);
621
622 void Rewrite(Loop *L, SCEVExpander &Rewriter,
623 SmallVectorImpl<WeakVH> &DeadInsts,
624 ScalarEvolution &SE, DominatorTree &DT,
625 Pass *P) const;
626
627 void print(raw_ostream &OS) const;
628 void dump() const;
629
630private:
631 Value *Expand(BasicBlock::iterator IP,
632 Loop *L, SCEVExpander &Rewriter,
633 SmallVectorImpl<WeakVH> &DeadInsts,
634 ScalarEvolution &SE, DominatorTree &DT) const;
635};
636
637}
638
639/// ExtractImmediate - If S involves the addition of a constant integer value,
640/// return that integer value, and mutate S to point to a new SCEV with that
641/// value excluded.
642static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
643 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
644 if (C->getValue()->getValue().getMinSignedBits() <= 64) {
645 S = SE.getIntegerSCEV(0, C->getType());
646 return C->getValue()->getSExtValue();
647 }
648 } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
649 SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
650 int64_t Result = ExtractImmediate(NewOps.front(), SE);
651 S = SE.getAddExpr(NewOps);
652 return Result;
653 } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
654 SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
655 int64_t Result = ExtractImmediate(NewOps.front(), SE);
656 S = SE.getAddRecExpr(NewOps, AR->getLoop());
657 return Result;
658 }
659 return 0;
660}
661
662/// ExtractSymbol - If S involves the addition of a GlobalValue address,
663/// return that symbol, and mutate S to point to a new SCEV with that
664/// value excluded.
665static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
666 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
667 if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
668 S = SE.getIntegerSCEV(0, GV->getType());
669 return GV;
670 }
671 } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
672 SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
673 GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
674 S = SE.getAddExpr(NewOps);
675 return Result;
676 } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
677 SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
678 GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
679 S = SE.getAddRecExpr(NewOps, AR->getLoop());
680 return Result;
681 }
682 return 0;
683}
684
685/// isLegalUse - Test whether the use described by AM is "legal", meaning
686/// it can be completely folded into the user instruction at isel time.
687/// This includes address-mode folding and special icmp tricks.
688static bool isLegalUse(const TargetLowering::AddrMode &AM,
689 LSRUse::KindType Kind, const Type *AccessTy,
690 const TargetLowering *TLI) {
691 switch (Kind) {
692 case LSRUse::Address:
693 // If we have low-level target information, ask the target if it can
694 // completely fold this address.
695 if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy);
696
697 // Otherwise, just guess that reg+reg addressing is legal.
698 return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1;
699
700 case LSRUse::ICmpZero:
701 // There's not even a target hook for querying whether it would be legal
702 // to fold a GV into an ICmp.
703 if (AM.BaseGV)
704 return false;
705
706 // ICmp only has two operands; don't allow more than two non-trivial parts.
707 if (AM.Scale != 0 && AM.HasBaseReg && AM.BaseOffs != 0)
708 return false;
709
710 // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale
711 // by putting the scaled register in the other operand of the icmp.
712 if (AM.Scale != 0 && AM.Scale != -1)
713 return false;
714
715 // If we have low-level target information, ask the target if it can
716 // fold an integer immediate on an icmp.
717 if (AM.BaseOffs != 0) {
718 if (TLI) return TLI->isLegalICmpImmediate(-AM.BaseOffs);
719 return false;
720 }
721
722 return true;
723
724 case LSRUse::Basic:
725 // Only handle single-register values.
726 return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
727
728 case LSRUse::Special:
729 // Only handle -1 scales, or no scale.
730 return AM.Scale == 0 || AM.Scale == -1;
731 }
732
733 return false;
734}
735
736static bool isAlwaysFoldable(const SCEV *S,
737 bool HasBaseReg,
738 LSRUse::KindType Kind, const Type *AccessTy,
739 const TargetLowering *TLI,
740 ScalarEvolution &SE) {
741 // Fast-path: zero is always foldable.
742 if (S->isZero()) return true;
743
744 // Conservatively, create an address with an immediate and a
745 // base and a scale.
746 TargetLowering::AddrMode AM;
747 AM.BaseOffs = ExtractImmediate(S, SE);
748 AM.BaseGV = ExtractSymbol(S, SE);
749 AM.HasBaseReg = HasBaseReg;
750 AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
751
752 // If there's anything else involved, it's not foldable.
753 if (!S->isZero()) return false;
754
755 return isLegalUse(AM, Kind, AccessTy, TLI);
756}
757
758/// InsertFormula - If the given formula has not yet been inserted, add it
759/// to the list, and return true. Return false otherwise.
760bool LSRUse::InsertFormula(const Formula &F) {
761 Formula Copy = F;
762
763 // Sort the base regs, to avoid adding the same solution twice with
764 // the base regs in different orders. This uses host pointer values, but
765 // it doesn't matter since it's only used for uniquifying.
766 std::sort(Copy.BaseRegs.begin(), Copy.BaseRegs.end());
767
768 DEBUG(for (SmallVectorImpl<const SCEV *>::const_iterator I =
769 F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I)
770 assert(!(*I)->isZero() && "Zero allocated in a base register!");
771 assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
772 "Zero allocated in a scaled register!"));
773
774 if (FormulaeUniquifier.insert(Copy)) {
775 Formulae.push_back(F);
776 return true;
777 }
778
779 return false;
780}
781
782void
783LSRUse::InsertInitialFormula(const SCEV *S, Loop *L,
784 ScalarEvolution &SE, DominatorTree &DT) {
785 Formula F;
786 F.InitialMatch(S, L, SE, DT);
787 bool Inserted = InsertFormula(F);
788 assert(Inserted && "Initial formula already exists!"); (void)Inserted;
789}
790
791void
792LSRUse::InsertSupplementalFormula(const SCEV *S) {
793 Formula F;
794 F.BaseRegs.push_back(S);
795 F.AM.HasBaseReg = true;
796 bool Inserted = InsertFormula(F);
797 assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
798}
799
800/// getImmediateDominator - A handy utility for the specific DominatorTree
801/// query that we need here.
802///
803static BasicBlock *getImmediateDominator(BasicBlock *BB, DominatorTree &DT) {
804 DomTreeNode *Node = DT.getNode(BB);
805 if (!Node) return 0;
806 Node = Node->getIDom();
807 if (!Node) return 0;
808 return Node->getBlock();
809}
810
811Value *LSRUse::Expand(BasicBlock::iterator IP,
812 Loop *L, SCEVExpander &Rewriter,
813 SmallVectorImpl<WeakVH> &DeadInsts,
814 ScalarEvolution &SE, DominatorTree &DT) const {
815 // Then, collect some instructions which we will remain dominated by when
816 // expanding the replacement. These must be dominated by any operands that
817 // will be required in the expansion.
818 SmallVector<Instruction *, 4> Inputs;
819 if (Instruction *I = dyn_cast<Instruction>(OperandValToReplace))
820 Inputs.push_back(I);
821 if (Kind == ICmpZero)
822 if (Instruction *I =
823 dyn_cast<Instruction>(cast<ICmpInst>(UserInst)->getOperand(1)))
824 Inputs.push_back(I);
825 if (PostIncLoop && !L->contains(UserInst))
826 Inputs.push_back(L->getLoopLatch()->getTerminator());
827
828 // Then, climb up the immediate dominator tree as far as we can go while
829 // still being dominated by the input positions.
830 for (;;) {
831 bool AllDominate = true;
832 Instruction *BetterPos = 0;
833 BasicBlock *IDom = getImmediateDominator(IP->getParent(), DT);
834 if (!IDom) break;
835 Instruction *Tentative = IDom->getTerminator();
836 for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
837 E = Inputs.end(); I != E; ++I) {
838 Instruction *Inst = *I;
839 if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
840 AllDominate = false;
841 break;
842 }
843 if (IDom == Inst->getParent() &&
844 (!BetterPos || DT.dominates(BetterPos, Inst)))
845 BetterPos = next(BasicBlock::iterator(Inst));
846 }
847 if (!AllDominate)
848 break;
849 if (BetterPos)
850 IP = BetterPos;
851 else
852 IP = Tentative;
853 }
854 while (isa<PHINode>(IP)) ++IP;
855
856 // The first formula in the list is the winner.
857 const Formula &F = Formulae.front();
858
859 // Inform the Rewriter if we have a post-increment use, so that it can
860 // perform an advantageous expansion.
861 Rewriter.setPostInc(PostIncLoop);
862
863 // This is the type that the user actually needs.
864 const Type *OpTy = OperandValToReplace->getType();
865 // This will be the type that we'll initially expand to.
866 const Type *Ty = F.getType();
867 if (!Ty)
868 // No type known; just expand directly to the ultimate type.
869 Ty = OpTy;
870 else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
871 // Expand directly to the ultimate type if it's the right size.
872 Ty = OpTy;
873 // This is the type to do integer arithmetic in.
874 const Type *IntTy = SE.getEffectiveSCEVType(Ty);
875
876 // Build up a list of operands to add together to form the full base.
877 SmallVector<const SCEV *, 8> Ops;
878
879 // Expand the BaseRegs portion.
880 for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
881 E = F.BaseRegs.end(); I != E; ++I) {
882 const SCEV *Reg = *I;
883 assert(!Reg->isZero() && "Zero allocated in a base register!");
884
885 // If we're expanding for a post-inc user for the add-rec's loop, make the
886 // post-inc adjustment.
887 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg))
888 if (AR->getLoop() == PostIncLoop)
889 Reg = SE.getAddExpr(Reg, AR->getStepRecurrence(SE));
890
891 Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
892 }
893
894 // Expand the ScaledReg portion.
895 Value *ICmpScaledV = 0;
896 if (F.AM.Scale != 0) {
897 const SCEV *ScaledS = F.ScaledReg;
898
899 // If we're expanding for a post-inc user for the add-rec's loop, make the
900 // post-inc adjustment.
901 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ScaledS))
902 if (AR->getLoop() == PostIncLoop)
903 ScaledS = SE.getAddExpr(ScaledS, AR->getStepRecurrence(SE));
904
905 if (Kind == ICmpZero) {
906 // An interesting way of "folding" with an icmp is to use a negated
907 // scale, which we'll implement by inserting it into the other operand
908 // of the icmp.
909 assert(F.AM.Scale == -1 &&
910 "The only scale supported by ICmpZero uses is -1!");
911 ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP);
912 } else {
913 // Otherwise just expand the scaled register and an explicit scale,
914 // which is expected to be matched as part of the address.
915 ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
916 const Type *ScaledTy = SE.getEffectiveSCEVType(ScaledS->getType());
917 ScaledS = SE.getMulExpr(ScaledS,
918 SE.getSCEV(ConstantInt::get(ScaledTy,
919 F.AM.Scale)));
920 Ops.push_back(ScaledS);
921 }
922 }
923
924 // Expand the immediate portions.
925 if (F.AM.BaseGV)
926 Ops.push_back(SE.getSCEV(F.AM.BaseGV));
927 if (F.AM.BaseOffs != 0) {
928 if (Kind == ICmpZero) {
929 // The other interesting way of "folding" with an ICmpZero is to use a
930 // negated immediate.
931 if (!ICmpScaledV)
932 ICmpScaledV = ConstantInt::get(IntTy, -F.AM.BaseOffs);
933 else {
934 Ops.push_back(SE.getUnknown(ICmpScaledV));
935 ICmpScaledV = ConstantInt::get(IntTy, F.AM.BaseOffs);
936 }
937 } else {
938 // Just add the immediate values. These again are expected to be matched
939 // as part of the address.
940 Ops.push_back(SE.getSCEV(ConstantInt::get(IntTy, F.AM.BaseOffs)));
941 }
942 }
943
944 // Emit instructions summing all the operands.
945 const SCEV *FullS = Ops.empty() ?
946 SE.getIntegerSCEV(0, IntTy) :
947 SE.getAddExpr(Ops);
948 Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
949
950 // We're done expanding now, so reset the rewriter.
951 Rewriter.setPostInc(0);
952
953 // An ICmpZero Formula represents an ICmp which we're handling as a
954 // comparison against zero. Now that we've expanded an expression for that
955 // form, update the ICmp's other operand.
956 if (Kind == ICmpZero) {
957 ICmpInst *CI = cast<ICmpInst>(UserInst);
958 DeadInsts.push_back(CI->getOperand(1));
959 assert(!F.AM.BaseGV && "ICmp does not support folding a global value and "
960 "a scale at the same time!");
961 if (F.AM.Scale == -1) {
962 if (ICmpScaledV->getType() != OpTy) {
963 Instruction *Cast =
964 CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
965 OpTy, false),
966 ICmpScaledV, OpTy, "tmp", CI);
967 ICmpScaledV = Cast;
968 }
969 CI->setOperand(1, ICmpScaledV);
970 } else {
971 assert(F.AM.Scale == 0 &&
972 "ICmp does not support folding a global value and "
973 "a scale at the same time!");
974 Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
975 -(uint64_t)F.AM.BaseOffs);
976 if (C->getType() != OpTy)
977 C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
978 OpTy, false),
979 C, OpTy);
980
981 CI->setOperand(1, C);
982 }
983 }
984
985 return FullV;
986}
987
988/// Rewrite - Emit instructions for the leading candidate expression for this
989/// LSRUse (this is called "expanding"), and update the UserInst to reference
990/// the newly expanded value.
991void LSRUse::Rewrite(Loop *L, SCEVExpander &Rewriter,
992 SmallVectorImpl<WeakVH> &DeadInsts,
993 ScalarEvolution &SE, DominatorTree &DT,
994 Pass *P) const {
995 const Type *OpTy = OperandValToReplace->getType();
996
997 // First, find an insertion point that dominates UserInst. For PHI nodes,
998 // find the nearest block which dominates all the relevant uses.
999 if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1000 DenseMap<BasicBlock *, Value *> Inserted;
1001 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1002 if (PN->getIncomingValue(i) == OperandValToReplace) {
1003 BasicBlock *BB = PN->getIncomingBlock(i);
1004
1005 // If this is a critical edge, split the edge so that we do not insert
1006 // the code on all predecessor/successor paths. We do this unless this
1007 // is the canonical backedge for this loop, which complicates post-inc
1008 // users.
1009 if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
1010 !isa<IndirectBrInst>(BB->getTerminator()) &&
1011 (PN->getParent() != L->getHeader() || !L->contains(BB))) {
1012 // Split the critical edge.
1013 BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
1014
1015 // If PN is outside of the loop and BB is in the loop, we want to
1016 // move the block to be immediately before the PHI block, not
1017 // immediately after BB.
1018 if (L->contains(BB) && !L->contains(PN))
1019 NewBB->moveBefore(PN->getParent());
1020
1021 // Splitting the edge can reduce the number of PHI entries we have.
1022 e = PN->getNumIncomingValues();
1023 BB = NewBB;
1024 i = PN->getBasicBlockIndex(BB);
1025 }
1026
1027 std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
1028 Inserted.insert(std::make_pair(BB, static_cast<Value *>(0)));
1029 if (!Pair.second)
1030 PN->setIncomingValue(i, Pair.first->second);
1031 else {
1032 Value *FullV = Expand(BB->getTerminator(),
1033 L, Rewriter, DeadInsts, SE, DT);
1034
1035 // If this is reuse-by-noop-cast, insert the noop cast.
1036 if (FullV->getType() != OpTy)
1037 FullV =
1038 CastInst::Create(CastInst::getCastOpcode(FullV, false,
1039 OpTy, false),
1040 FullV, OperandValToReplace->getType(),
1041 "tmp", BB->getTerminator());
1042
1043 PN->setIncomingValue(i, FullV);
1044 Pair.first->second = FullV;
1045 }
1046 }
1047 } else {
1048 Value *FullV = Expand(UserInst, L, Rewriter, DeadInsts, SE, DT);
1049
1050 // If this is reuse-by-noop-cast, insert the noop cast.
1051 if (FullV->getType() != OpTy) {
1052 Instruction *Cast =
1053 CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
1054 FullV, OpTy, "tmp", UserInst);
1055 FullV = Cast;
1056 }
1057
1058 // Update the user.
1059 UserInst->replaceUsesOfWith(OperandValToReplace, FullV);
1060 }
1061
1062 DeadInsts.push_back(OperandValToReplace);
1063}
1064
1065void LSRUse::print(raw_ostream &OS) const {
1066 OS << "LSR Use: Kind=";
1067 switch (Kind) {
1068 case Basic: OS << "Basic"; break;
1069 case Special: OS << "Special"; break;
1070 case ICmpZero: OS << "ICmpZero"; break;
1071 case Address:
1072 OS << "Address of ";
1073 if (isa<PointerType>(AccessTy))
1074 OS << "pointer"; // the full pointer type could be really verbose
1075 else
1076 OS << *AccessTy;
1077 }
1078
1079 OS << ", UserInst=";
1080 // Store is common and interesting enough to be worth special-casing.
1081 if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1082 OS << "store ";
1083 WriteAsOperand(OS, Store->getOperand(0), /*PrintType=*/false);
1084 } else if (UserInst->getType()->isVoidTy())
1085 OS << UserInst->getOpcodeName();
1086 else
1087 WriteAsOperand(OS, UserInst, /*PrintType=*/false);
1088
1089 OS << ", OperandValToReplace=";
1090 WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false);
1091
1092 if (PostIncLoop) {
1093 OS << ", PostIncLoop=";
1094 WriteAsOperand(OS, PostIncLoop->getHeader(), /*PrintType=*/false);
1095 }
1096}
1097
1098void LSRUse::dump() const {
1099 print(errs()); errs() << '\n';
1100}
1101
1102namespace {
1103
1104/// Score - This class is used to measure and compare candidate formulae.
1105class Score {
1106 unsigned NumRegs;
1107 unsigned NumPhis;
1108 unsigned NumIVMuls;
1109 unsigned NumBaseAdds;
1110 unsigned NumImms;
1111
1112public:
1113 Score()
1114 : NumRegs(0), NumPhis(0), NumIVMuls(0), NumBaseAdds(0), NumImms(0) {}
1115
1116 void RateInitial(SmallVector<LSRUse, 16> const &Uses, const Loop *L,
1117 ScalarEvolution &SE);
1118
1119 void Rate(const SCEV *Reg, const SmallBitVector &Bits,
1120 const SmallVector<LSRUse, 16> &Uses, const Loop *L,
1121 ScalarEvolution &SE);
1122
1123 bool operator<(const Score &Other) const;
1124
1125 void print_details(raw_ostream &OS, const SCEV *Reg,
1126 const SmallPtrSet<const SCEV *, 8> &Regs) const;
1127
1128 void print(raw_ostream &OS) const;
1129 void dump() const;
1130
1131private:
1132 void RateRegister(const SCEV *Reg, SmallPtrSet<const SCEV *, 8> &Regs,
1133 const Loop *L);
1134 void RateFormula(const Formula &F, SmallPtrSet<const SCEV *, 8> &Regs,
1135 const Loop *L);
1136
1137 void Loose();
1138};
1139
1140}
1141
1142/// RateRegister - Tally up interesting quantities from the given register.
1143void Score::RateRegister(const SCEV *Reg,
1144 SmallPtrSet<const SCEV *, 8> &Regs,
1145 const Loop *L) {
1146 if (Regs.insert(Reg))
1147 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
1148 NumPhis += AR->getLoop() == L;
1149
1150 // Add the step value register, if it needs one.
1151 if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1)))
1152 RateRegister(AR->getOperand(1), Regs, L);
1153 }
1154}
1155
1156void Score::RateFormula(const Formula &F,
1157 SmallPtrSet<const SCEV *, 8> &Regs,
1158 const Loop *L) {
1159 // Tally up the registers.
1160 if (F.ScaledReg)
1161 RateRegister(F.ScaledReg, Regs, L);
1162 for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
1163 E = F.BaseRegs.end(); I != E; ++I) {
1164 const SCEV *BaseReg = *I;
1165 RateRegister(BaseReg, Regs, L);
1166
1167 NumIVMuls += isa<SCEVMulExpr>(BaseReg) &&
1168 BaseReg->hasComputableLoopEvolution(L);
1169 }
1170
1171 if (F.BaseRegs.size() > 1)
1172 NumBaseAdds += F.BaseRegs.size() - 1;
1173
1174 // Tally up the non-zero immediates.
1175 if (F.AM.BaseGV || F.AM.BaseOffs != 0)
1176 ++NumImms;
1177}
1178
1179/// Loose - Set this score to a loosing value.
1180void Score::Loose() {
1181 NumRegs = ~0u;
1182 NumPhis = ~0u;
1183 NumIVMuls = ~0u;
1184 NumBaseAdds = ~0u;
1185 NumImms = ~0u;
1186}
1187
1188/// RateInitial - Compute a score for the initial "fully reduced" solution.
1189void Score::RateInitial(SmallVector<LSRUse, 16> const &Uses, const Loop *L,
1190 ScalarEvolution &SE) {
1191 SmallPtrSet<const SCEV *, 8> Regs;
1192 for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
1193 E = Uses.end(); I != E; ++I)
1194 RateFormula(I->Formulae.front(), Regs, L);
1195 NumRegs += Regs.size();
1196
1197 DEBUG(print_details(dbgs(), 0, Regs));
1198}
1199
1200/// Rate - Compute a score for the solution where the reuse associated with
1201/// putting Reg in a register is selected.
1202void Score::Rate(const SCEV *Reg, const SmallBitVector &Bits,
1203 const SmallVector<LSRUse, 16> &Uses, const Loop *L,
1204 ScalarEvolution &SE) {
1205 SmallPtrSet<const SCEV *, 8> Regs;
1206 for (size_t i = 0, e = Uses.size(); i != e; ++i) {
1207 const LSRUse &LU = Uses[i];
1208
1209 const Formula *BestFormula = 0;
1210 if (i >= Bits.size() || !Bits.test(i))
1211 // This use doesn't use the current register. Just go with the current
1212 // leading candidate formula.
1213 BestFormula = &LU.Formulae.front();
1214 else
1215 // Find the best formula for this use that uses the current register.
1216 for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
1217 E = LU.Formulae.end(); I != E; ++I) {
1218 const Formula &F = *I;
1219 if (F.referencesReg(Reg) &&
1220 (!BestFormula || ComplexitySorter()(F, *BestFormula)))
1221 BestFormula = &F;
1222 }
1223
1224 // If we didn't find *any* forumlae, because earlier we eliminated some
1225 // in greedy fashion, skip the current register's reuse opportunity.
1226 if (!BestFormula) {
1227 DEBUG(dbgs() << "Reuse with reg " << *Reg
1228 << " wouldn't help any users.\n");
1229 Loose();
1230 return;
1231 }
1232
1233 // For an in-loop post-inc user, don't allow multiple base registers,
1234 // because that would require an awkward in-loop add after the increment.
1235 if (LU.PostIncLoop && LU.PostIncLoop->contains(LU.UserInst) &&
1236 BestFormula->BaseRegs.size() > 1) {
1237 DEBUG(dbgs() << "Reuse with reg " << *Reg
1238 << " would require an in-loop post-inc add: ";
1239 BestFormula->dump());
1240 Loose();
1241 return;
1242 }
1243
1244 RateFormula(*BestFormula, Regs, L);
1245 }
1246 NumRegs += Regs.size();
1247
1248 DEBUG(print_details(dbgs(), Reg, Regs));
1249}
1250
1251/// operator< - Choose the better score.
1252bool Score::operator<(const Score &Other) const {
1253 if (NumRegs != Other.NumRegs)
1254 return NumRegs < Other.NumRegs;
1255 if (NumPhis != Other.NumPhis)
1256 return NumPhis < Other.NumPhis;
1257 if (NumIVMuls != Other.NumIVMuls)
1258 return NumIVMuls < Other.NumIVMuls;
1259 if (NumBaseAdds != Other.NumBaseAdds)
1260 return NumBaseAdds < Other.NumBaseAdds;
1261 return NumImms < Other.NumImms;
1262}
1263
1264void Score::print_details(raw_ostream &OS,
1265 const SCEV *Reg,
1266 const SmallPtrSet<const SCEV *, 8> &Regs) const {
1267 if (Reg) OS << "Reuse with reg " << *Reg << " would require ";
1268 else OS << "The initial solution would require ";
1269 print(OS);
1270 OS << ". Regs:";
1271 for (SmallPtrSet<const SCEV *, 8>::const_iterator I = Regs.begin(),
1272 E = Regs.end(); I != E; ++I)
1273 OS << ' ' << **I;
1274 OS << '\n';
1275}
1276
1277void Score::print(raw_ostream &OS) const {
1278 OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
1279 if (NumPhis != 0)
1280 OS << ", including " << NumPhis << " PHI" << (NumPhis == 1 ? "" : "s");
1281 if (NumIVMuls != 0)
1282 OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s");
1283 if (NumBaseAdds != 0)
1284 OS << ", plus " << NumBaseAdds << " base add"
1285 << (NumBaseAdds == 1 ? "" : "s");
1286 if (NumImms != 0)
1287 OS << ", plus " << NumImms << " imm" << (NumImms == 1 ? "" : "s");
1288}
1289
1290void Score::dump() const {
1291 print(errs()); errs() << '\n';
Nate Begemaneaa13852004-10-18 21:08:22 +00001292}
1293
Dan Gohmanf284ce22009-02-18 00:08:39 +00001294/// isAddressUse - Returns true if the specified instruction is using the
Dale Johannesen203af582008-12-05 21:47:27 +00001295/// specified value as an address.
1296static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
1297 bool isAddress = isa<LoadInst>(Inst);
1298 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
1299 if (SI->getOperand(1) == OperandVal)
1300 isAddress = true;
1301 } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
1302 // Addressing modes can also be folded into prefetches and a variety
1303 // of intrinsics.
1304 switch (II->getIntrinsicID()) {
1305 default: break;
1306 case Intrinsic::prefetch:
1307 case Intrinsic::x86_sse2_loadu_dq:
1308 case Intrinsic::x86_sse2_loadu_pd:
1309 case Intrinsic::x86_sse_loadu_ps:
1310 case Intrinsic::x86_sse_storeu_ps:
1311 case Intrinsic::x86_sse2_storeu_pd:
1312 case Intrinsic::x86_sse2_storeu_dq:
1313 case Intrinsic::x86_sse2_storel_dq:
1314 if (II->getOperand(1) == OperandVal)
1315 isAddress = true;
1316 break;
1317 }
1318 }
1319 return isAddress;
1320}
Chris Lattner0ae33eb2005-10-03 01:04:44 +00001321
Dan Gohman21e77222009-03-09 21:01:17 +00001322/// getAccessType - Return the type of the memory being accessed.
1323static const Type *getAccessType(const Instruction *Inst) {
Dan Gohmana537bf82009-05-18 16:45:28 +00001324 const Type *AccessTy = Inst->getType();
Dan Gohman21e77222009-03-09 21:01:17 +00001325 if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
Dan Gohmana537bf82009-05-18 16:45:28 +00001326 AccessTy = SI->getOperand(0)->getType();
Dan Gohman21e77222009-03-09 21:01:17 +00001327 else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
1328 // Addressing modes can also be folded into prefetches and a variety
1329 // of intrinsics.
1330 switch (II->getIntrinsicID()) {
1331 default: break;
1332 case Intrinsic::x86_sse_storeu_ps:
1333 case Intrinsic::x86_sse2_storeu_pd:
1334 case Intrinsic::x86_sse2_storeu_dq:
1335 case Intrinsic::x86_sse2_storel_dq:
Dan Gohmana537bf82009-05-18 16:45:28 +00001336 AccessTy = II->getOperand(1)->getType();
Dan Gohman21e77222009-03-09 21:01:17 +00001337 break;
1338 }
1339 }
Dan Gohmana537bf82009-05-18 16:45:28 +00001340 return AccessTy;
Dan Gohman21e77222009-03-09 21:01:17 +00001341}
1342
Dan Gohmana10756e2010-01-21 02:09:26 +00001343/// DeleteTriviallyDeadInstructions - If any of the instructions is the
1344/// specified set are trivially dead, delete them and see if this makes any of
1345/// their operands subsequently dead.
1346static bool
1347DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
1348 bool Changed = false;
Jim Grosbach56a1f802009-11-17 17:53:56 +00001349
Dan Gohmana10756e2010-01-21 02:09:26 +00001350 while (!DeadInsts.empty()) {
1351 Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
Nate Begeman16997482005-07-30 00:15:07 +00001352
Dan Gohmana10756e2010-01-21 02:09:26 +00001353 if (I == 0 || !isInstructionTriviallyDead(I))
Evan Cheng55e641b2008-03-19 22:02:26 +00001354 continue;
Jim Grosbach56a1f802009-11-17 17:53:56 +00001355
Dan Gohmana10756e2010-01-21 02:09:26 +00001356 for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
1357 if (Instruction *U = dyn_cast<Instruction>(*OI)) {
1358 *OI = 0;
1359 if (U->use_empty())
1360 DeadInsts.push_back(U);
Dan Gohman2acc7602007-05-03 23:20:33 +00001361 }
Dan Gohman02e4fa72007-10-22 20:40:42 +00001362
Dan Gohmana10756e2010-01-21 02:09:26 +00001363 I->eraseFromParent();
Dan Gohmanafc36a92009-05-02 18:29:22 +00001364 Changed = true;
Evan Chengcdf43b12007-10-25 09:11:16 +00001365 }
1366
Dan Gohmana10756e2010-01-21 02:09:26 +00001367 return Changed;
Evan Chengcdf43b12007-10-25 09:11:16 +00001368}
1369
Dan Gohmana10756e2010-01-21 02:09:26 +00001370namespace {
Dan Gohmanad7321f2008-09-15 21:22:06 +00001371
Dan Gohmana10756e2010-01-21 02:09:26 +00001372/// LSRInstance - This class holds state for the main loop strength
1373/// reduction logic.
1374class LSRInstance {
1375 IVUsers &IU;
1376 ScalarEvolution &SE;
1377 DominatorTree &DT;
1378 const TargetLowering *const TLI;
1379 Loop *const L;
1380 bool Changed;
Dan Gohmanad7321f2008-09-15 21:22:06 +00001381
Dan Gohmana10756e2010-01-21 02:09:26 +00001382 /// CurrentArbitraryRegIndex - To ensure a deterministic ordering, assign an
1383 /// arbitrary index value to each register as a sort tie breaker.
1384 unsigned CurrentArbitraryRegIndex;
Dan Gohmanad7321f2008-09-15 21:22:06 +00001385
Dan Gohmana10756e2010-01-21 02:09:26 +00001386 /// Factors - Interesting factors between use strides.
1387 SmallSetVector<int64_t, 4> Factors;
Dan Gohmanad7321f2008-09-15 21:22:06 +00001388
Dan Gohmana10756e2010-01-21 02:09:26 +00001389 /// Types - Interesting use types, to facilitate truncation reuse.
1390 SmallSetVector<const Type *, 4> Types;
Dan Gohmanad7321f2008-09-15 21:22:06 +00001391
Dan Gohmana10756e2010-01-21 02:09:26 +00001392 /// Uses - The list of interesting uses.
1393 SmallVector<LSRUse, 16> Uses;
Dan Gohman4d1c1ef2009-06-19 23:03:46 +00001394
Dan Gohmana10756e2010-01-21 02:09:26 +00001395 // TODO: Reorganize these data structures.
1396 typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
1397 RegUsesTy RegUses;
1398 SmallVector<const SCEV *, 16> RegSequence;
Dan Gohmanad7321f2008-09-15 21:22:06 +00001399
Dan Gohmana10756e2010-01-21 02:09:26 +00001400 void OptimizeShadowIV();
1401 bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
1402 const SCEV* &CondStride);
1403 ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
1404 bool StrideMightBeShared(const SCEV* Stride);
1405 bool OptimizeLoopTermCond(Instruction *&IVIncInsertPos);
Dan Gohmanad7321f2008-09-15 21:22:06 +00001406
Dan Gohmana10756e2010-01-21 02:09:26 +00001407 LSRUse &getNewUse() {
1408 Uses.push_back(LSRUse());
1409 return Uses.back();
1410 }
Dan Gohmanbc10b8c2009-03-04 20:49:01 +00001411
Dan Gohmana10756e2010-01-21 02:09:26 +00001412 void CountRegister(const SCEV *Reg, uint32_t Complexity, size_t LUIdx);
1413 void CountRegisters(const Formula &F, size_t LUIdx);
Dan Gohmanad7321f2008-09-15 21:22:06 +00001414
Dan Gohmana10756e2010-01-21 02:09:26 +00001415 void GenerateSymbolicOffsetReuse(LSRUse &LU, unsigned LUIdx,
1416 Formula Base);
1417 void GenerateICmpZeroScaledReuse(LSRUse &LU, unsigned LUIdx,
1418 Formula Base);
1419 void GenerateFormulaeFromReplacedBaseReg(LSRUse &LU,
1420 unsigned LUIdx,
1421 const Formula &Base, unsigned i,
1422 const SmallVectorImpl<const SCEV *>
1423 &AddOps);
1424 void GenerateReassociationReuse(LSRUse &LU, unsigned LUIdx,
1425 Formula Base);
1426 void GenerateCombinationReuse(LSRUse &LU, unsigned LUIdx,
1427 Formula Base);
1428 void GenerateScaledReuse(LSRUse &LU, unsigned LUIdx,
1429 Formula Base);
1430 void GenerateTruncateReuse(LSRUse &LU, unsigned LUIdx,
1431 Formula Base);
Dan Gohman2781f302009-06-19 23:23:27 +00001432
Dan Gohmana10756e2010-01-21 02:09:26 +00001433 void GenerateConstantOffsetReuse();
Dan Gohmanad7321f2008-09-15 21:22:06 +00001434
Dan Gohmana10756e2010-01-21 02:09:26 +00001435 void GenerateAllReuseFormulae();
1436
1437 void GenerateLoopInvariantRegisterUses();
1438
1439public:
1440 LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
1441
1442 bool getChanged() const { return Changed; }
1443
1444 void print(raw_ostream &OS) const;
1445 void dump() const;
1446};
1447
Dan Gohmanad7321f2008-09-15 21:22:06 +00001448}
1449
Devang Patela0b39092008-08-26 17:57:54 +00001450/// OptimizeShadowIV - If IV is used in a int-to-float cast
1451/// inside the loop then try to eliminate the cast opeation.
Dan Gohmana10756e2010-01-21 02:09:26 +00001452void LSRInstance::OptimizeShadowIV() {
1453 const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
Dan Gohman46bdfb02009-02-24 18:55:53 +00001454 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
Devang Patela0b39092008-08-26 17:57:54 +00001455 return;
Jim Grosbach56a1f802009-11-17 17:53:56 +00001456
Dan Gohmana10756e2010-01-21 02:09:26 +00001457 for (size_t StrideIdx = 0, e = IU.StrideOrder.size();
1458 StrideIdx != e; ++StrideIdx) {
Dan Gohman0bba49c2009-07-07 17:06:11 +00001459 std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
Dan Gohmana10756e2010-01-21 02:09:26 +00001460 IU.IVUsesByStride.find(IU.StrideOrder[StrideIdx]);
1461 assert(SI != IU.IVUsesByStride.end() && "Stride doesn't exist!");
Devang Patela0b39092008-08-26 17:57:54 +00001462 if (!isa<SCEVConstant>(SI->first))
1463 continue;
1464
Dan Gohman81db61a2009-05-12 02:17:14 +00001465 for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
1466 E = SI->second->Users.end(); UI != E; /* empty */) {
1467 ilist<IVStrideUse>::iterator CandidateUI = UI;
Devang Patel54153272008-08-27 17:50:18 +00001468 ++UI;
Dan Gohman81db61a2009-05-12 02:17:14 +00001469 Instruction *ShadowUse = CandidateUI->getUser();
Devang Patela0b39092008-08-26 17:57:54 +00001470 const Type *DestTy = NULL;
1471
1472 /* If shadow use is a int->float cast then insert a second IV
Devang Patel54153272008-08-27 17:50:18 +00001473 to eliminate this cast.
Devang Patela0b39092008-08-26 17:57:54 +00001474
Jim Grosbach56a1f802009-11-17 17:53:56 +00001475 for (unsigned i = 0; i < n; ++i)
Devang Patela0b39092008-08-26 17:57:54 +00001476 foo((double)i);
1477
Devang Patel54153272008-08-27 17:50:18 +00001478 is transformed into
Devang Patela0b39092008-08-26 17:57:54 +00001479
1480 double d = 0.0;
Jim Grosbach56a1f802009-11-17 17:53:56 +00001481 for (unsigned i = 0; i < n; ++i, ++d)
Devang Patela0b39092008-08-26 17:57:54 +00001482 foo(d);
1483 */
Dan Gohman81db61a2009-05-12 02:17:14 +00001484 if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
Devang Patela0b39092008-08-26 17:57:54 +00001485 DestTy = UCast->getDestTy();
Dan Gohman81db61a2009-05-12 02:17:14 +00001486 else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser()))
Devang Patela0b39092008-08-26 17:57:54 +00001487 DestTy = SCast->getDestTy();
Devang Patel18bb2782008-08-27 20:55:23 +00001488 if (!DestTy) continue;
1489
1490 if (TLI) {
Evan Cheng5792f512009-05-11 22:33:01 +00001491 // If target does not support DestTy natively then do not apply
1492 // this transformation.
Owen Andersone50ed302009-08-10 22:56:29 +00001493 EVT DVT = TLI->getValueType(DestTy);
Devang Patel18bb2782008-08-27 20:55:23 +00001494 if (!TLI->isTypeLegal(DVT)) continue;
1495 }
1496
Devang Patela0b39092008-08-26 17:57:54 +00001497 PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
1498 if (!PH) continue;
1499 if (PH->getNumIncomingValues() != 2) continue;
1500
1501 const Type *SrcTy = PH->getType();
1502 int Mantissa = DestTy->getFPMantissaWidth();
Jim Grosbach56a1f802009-11-17 17:53:56 +00001503 if (Mantissa == -1) continue;
Dan Gohmana10756e2010-01-21 02:09:26 +00001504 if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
Devang Patela0b39092008-08-26 17:57:54 +00001505 continue;
1506
1507 unsigned Entry, Latch;
1508 if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
1509 Entry = 0;
1510 Latch = 1;
1511 } else {
1512 Entry = 1;
1513 Latch = 0;
1514 }
Jim Grosbach56a1f802009-11-17 17:53:56 +00001515
Devang Patela0b39092008-08-26 17:57:54 +00001516 ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
1517 if (!Init) continue;
Owen Anderson6f83c9c2009-07-27 20:59:43 +00001518 Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
Devang Patela0b39092008-08-26 17:57:54 +00001519
Jim Grosbach56a1f802009-11-17 17:53:56 +00001520 BinaryOperator *Incr =
Devang Patela0b39092008-08-26 17:57:54 +00001521 dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
1522 if (!Incr) continue;
1523 if (Incr->getOpcode() != Instruction::Add
1524 && Incr->getOpcode() != Instruction::Sub)
1525 continue;
1526
1527 /* Initialize new IV, double d = 0.0 in above example. */
1528 ConstantInt *C = NULL;
1529 if (Incr->getOperand(0) == PH)
1530 C = dyn_cast<ConstantInt>(Incr->getOperand(1));
1531 else if (Incr->getOperand(1) == PH)
1532 C = dyn_cast<ConstantInt>(Incr->getOperand(0));
1533 else
1534 continue;
1535
1536 if (!C) continue;
1537
Dan Gohmana8225082009-10-26 15:32:57 +00001538 // Ignore negative constants, as the code below doesn't handle them
1539 // correctly. TODO: Remove this restriction.
1540 if (!C->getValue().isStrictlyPositive()) continue;
1541
Devang Patela0b39092008-08-26 17:57:54 +00001542 /* Add new PHINode. */
1543 PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
1544
Devang Patel54153272008-08-27 17:50:18 +00001545 /* create new increment. '++d' in above example. */
Owen Anderson6f83c9c2009-07-27 20:59:43 +00001546 Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
Jim Grosbach56a1f802009-11-17 17:53:56 +00001547 BinaryOperator *NewIncr =
Dan Gohmanae3a0be2009-06-04 22:49:04 +00001548 BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
1549 Instruction::FAdd : Instruction::FSub,
Devang Patela0b39092008-08-26 17:57:54 +00001550 NewPH, CFP, "IV.S.next.", Incr);
1551
1552 NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
1553 NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
1554
1555 /* Remove cast operation */
Devang Patela0b39092008-08-26 17:57:54 +00001556 ShadowUse->replaceAllUsesWith(NewPH);
1557 ShadowUse->eraseFromParent();
Devang Patela0b39092008-08-26 17:57:54 +00001558 break;
1559 }
1560 }
1561}
1562
Dan Gohmana10756e2010-01-21 02:09:26 +00001563/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
1564/// set the IV user and stride information and return true, otherwise return
1565/// false.
1566bool LSRInstance::FindIVUserForCond(ICmpInst *Cond,
1567 IVStrideUse *&CondUse,
1568 const SCEV* &CondStride) {
1569 for (unsigned StrideIdx = 0, e = IU.StrideOrder.size();
1570 StrideIdx != e && !CondUse; ++StrideIdx) {
1571 std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
1572 IU.IVUsesByStride.find(IU.StrideOrder[StrideIdx]);
1573 assert(SI != IU.IVUsesByStride.end() && "Stride doesn't exist!");
Chris Lattner010de252005-08-08 05:28:22 +00001574
Dan Gohmana10756e2010-01-21 02:09:26 +00001575 for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
1576 E = SI->second->Users.end(); UI != E; ++UI)
1577 if (UI->getUser() == Cond) {
1578 // NOTE: we could handle setcc instructions with multiple uses here, but
1579 // InstCombine does it as well for simple uses, it's not clear that it
1580 // occurs enough in real life to handle.
1581 CondUse = UI;
1582 CondStride = SI->first;
1583 return true;
1584 }
1585 }
1586 return false;
Evan Cheng2d850522009-05-09 01:08:24 +00001587}
1588
Dan Gohmana10756e2010-01-21 02:09:26 +00001589/// OptimizeMax - Rewrite the loop's terminating condition if it uses
1590/// a max computation.
1591///
1592/// This is a narrow solution to a specific, but acute, problem. For loops
1593/// like this:
1594///
1595/// i = 0;
1596/// do {
1597/// p[i] = 0.0;
1598/// } while (++i < n);
1599///
1600/// the trip count isn't just 'n', because 'n' might not be positive. And
1601/// unfortunately this can come up even for loops where the user didn't use
1602/// a C do-while loop. For example, seemingly well-behaved top-test loops
1603/// will commonly be lowered like this:
1604//
1605/// if (n > 0) {
1606/// i = 0;
1607/// do {
1608/// p[i] = 0.0;
1609/// } while (++i < n);
1610/// }
1611///
1612/// and then it's possible for subsequent optimization to obscure the if
1613/// test in such a way that indvars can't find it.
1614///
1615/// When indvars can't find the if test in loops like this, it creates a
1616/// max expression, which allows it to give the loop a canonical
1617/// induction variable:
1618///
1619/// i = 0;
1620/// max = n < 1 ? 1 : n;
1621/// do {
1622/// p[i] = 0.0;
1623/// } while (++i != max);
1624///
1625/// Canonical induction variables are necessary because the loop passes
1626/// are designed around them. The most obvious example of this is the
1627/// LoopInfo analysis, which doesn't remember trip count values. It
1628/// expects to be able to rediscover the trip count each time it is
1629/// needed, and it does this using a simple analysis that only succeeds if
1630/// the loop has a canonical induction variable.
1631///
1632/// However, when it comes time to generate code, the maximum operation
1633/// can be quite costly, especially if it's inside of an outer loop.
1634///
1635/// This function solves this problem by detecting this type of loop and
1636/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
1637/// the instructions for the maximum computation.
1638///
1639ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
1640 // Check that the loop matches the pattern we're looking for.
1641 if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
1642 Cond->getPredicate() != CmpInst::ICMP_NE)
1643 return Cond;
1644
1645 SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
1646 if (!Sel || !Sel->hasOneUse()) return Cond;
1647
1648 const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
1649 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
1650 return Cond;
1651 const SCEV *One = SE.getIntegerSCEV(1, BackedgeTakenCount->getType());
1652
1653 // Add one to the backedge-taken count to get the trip count.
1654 const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One);
1655
1656 // Check for a max calculation that matches the pattern.
1657 if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount))
1658 return Cond;
1659 const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount);
1660 if (Max != SE.getSCEV(Sel)) return Cond;
1661
1662 // To handle a max with more than two operands, this optimization would
1663 // require additional checking and setup.
1664 if (Max->getNumOperands() != 2)
1665 return Cond;
1666
1667 const SCEV *MaxLHS = Max->getOperand(0);
1668 const SCEV *MaxRHS = Max->getOperand(1);
1669 if (!MaxLHS || MaxLHS != One) return Cond;
1670 // Check the relevant induction variable for conformance to
1671 // the pattern.
1672 const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
1673 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
1674 if (!AR || !AR->isAffine() ||
1675 AR->getStart() != One ||
1676 AR->getStepRecurrence(SE) != One)
1677 return Cond;
1678
1679 assert(AR->getLoop() == L &&
1680 "Loop condition operand is an addrec in a different loop!");
1681
1682 // Check the right operand of the select, and remember it, as it will
1683 // be used in the new comparison instruction.
1684 Value *NewRHS = 0;
1685 if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
1686 NewRHS = Sel->getOperand(1);
1687 else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
1688 NewRHS = Sel->getOperand(2);
1689 if (!NewRHS) return Cond;
1690
1691 // Determine the new comparison opcode. It may be signed or unsigned,
1692 // and the original comparison may be either equality or inequality.
1693 CmpInst::Predicate Pred =
1694 isa<SCEVSMaxExpr>(Max) ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
1695 if (Cond->getPredicate() == CmpInst::ICMP_EQ)
1696 Pred = CmpInst::getInversePredicate(Pred);
1697
1698 // Ok, everything looks ok to change the condition into an SLT or SGE and
1699 // delete the max calculation.
1700 ICmpInst *NewCond =
1701 new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
1702
1703 // Delete the max calculation instructions.
1704 Cond->replaceAllUsesWith(NewCond);
1705 CondUse->setUser(NewCond);
1706 Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
1707 Cond->eraseFromParent();
1708 Sel->eraseFromParent();
1709 if (Cmp->use_empty())
1710 Cmp->eraseFromParent();
1711 return NewCond;
1712}
1713
1714bool LSRInstance::StrideMightBeShared(const SCEV* Stride) {
Evan Cheng586f69a2009-11-12 07:35:05 +00001715 int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue();
Dan Gohmana10756e2010-01-21 02:09:26 +00001716 for (unsigned i = 0, e = IU.StrideOrder.size(); i != e; ++i) {
Evan Cheng586f69a2009-11-12 07:35:05 +00001717 std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
Dan Gohmana10756e2010-01-21 02:09:26 +00001718 IU.IVUsesByStride.find(IU.StrideOrder[i]);
Evan Cheng586f69a2009-11-12 07:35:05 +00001719 const SCEV *Share = SI->first;
1720 if (!isa<SCEVConstant>(SI->first) || Share == Stride)
1721 continue;
1722 int64_t SSInt = cast<SCEVConstant>(Share)->getValue()->getSExtValue();
1723 if (SSInt == SInt)
1724 return true; // This can definitely be reused.
1725 if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0)
1726 continue;
1727 int64_t Scale = SSInt / SInt;
Dan Gohmana10756e2010-01-21 02:09:26 +00001728
1729 // This AM will be used for conservative queries. At this point in the
1730 // process we don't know which users will have a base reg, immediate,
1731 // etc., so we conservatively assume that it may not, making more
1732 // strides valid, thus erring on the side of assuming that there
1733 // might be sharing.
1734 TargetLowering::AddrMode AM;
1735 AM.Scale = Scale;
1736
1737 // Any pre-inc iv use?
1738 IVUsersOfOneStride &StrideUses = *IU.IVUsesByStride[Share];
1739 for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
1740 E = StrideUses.Users.end(); I != E; ++I) {
1741 bool isAddress = isAddressUse(I->getUser(), I->getOperandValToReplace());
1742 if (!I->isUseOfPostIncrementedValue() &&
1743 isLegalUse(AM, isAddress ? LSRUse::Address : LSRUse::Basic,
1744 isAddress ? getAccessType(I->getUser()) : 0,
1745 TLI))
Evan Cheng586f69a2009-11-12 07:35:05 +00001746 return true;
Evan Cheng5792f512009-05-11 22:33:01 +00001747 }
Evan Cheng5792f512009-05-11 22:33:01 +00001748 }
Evan Cheng586f69a2009-11-12 07:35:05 +00001749 return false;
Chris Lattner010de252005-08-08 05:28:22 +00001750}
Nate Begeman16997482005-07-30 00:15:07 +00001751
Jim Grosbach56a1f802009-11-17 17:53:56 +00001752/// OptimizeLoopTermCond - Change loop terminating condition to use the
Evan Cheng586f69a2009-11-12 07:35:05 +00001753/// postinc iv when possible.
Dan Gohmana10756e2010-01-21 02:09:26 +00001754bool
1755LSRInstance::OptimizeLoopTermCond(Instruction *&IVIncInsertPos) {
1756 SmallPtrSet<Instruction *, 4> PostIncs;
1757
Evan Cheng586f69a2009-11-12 07:35:05 +00001758 BasicBlock *LatchBlock = L->getLoopLatch();
Evan Cheng076e0852009-11-17 18:10:11 +00001759 SmallVector<BasicBlock*, 8> ExitingBlocks;
1760 L->getExitingBlocks(ExitingBlocks);
Jim Grosbach56a1f802009-11-17 17:53:56 +00001761
Evan Cheng076e0852009-11-17 18:10:11 +00001762 for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
1763 BasicBlock *ExitingBlock = ExitingBlocks[i];
Evan Cheng586f69a2009-11-12 07:35:05 +00001764
Dan Gohmana10756e2010-01-21 02:09:26 +00001765 // Get the terminating condition for the loop if possible. If we
Evan Cheng076e0852009-11-17 18:10:11 +00001766 // can, we want to change it to use a post-incremented version of its
1767 // induction variable, to allow coalescing the live ranges for the IV into
1768 // one register value.
Evan Cheng586f69a2009-11-12 07:35:05 +00001769
Evan Cheng076e0852009-11-17 18:10:11 +00001770 BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
1771 if (!TermBr)
1772 continue;
1773 // FIXME: Overly conservative, termination condition could be an 'or' etc..
1774 if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
1775 continue;
Evan Cheng586f69a2009-11-12 07:35:05 +00001776
Evan Cheng076e0852009-11-17 18:10:11 +00001777 // Search IVUsesByStride to find Cond's IVUse if there is one.
1778 IVStrideUse *CondUse = 0;
1779 const SCEV *CondStride = 0;
1780 ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
1781 if (!FindIVUserForCond(Cond, CondUse, CondStride))
1782 continue;
1783
Evan Cheng076e0852009-11-17 18:10:11 +00001784 // If the trip count is computed in terms of a max (due to ScalarEvolution
1785 // being unable to find a sufficient guard, for example), change the loop
1786 // comparison to use SLT or ULT instead of NE.
Dan Gohmana10756e2010-01-21 02:09:26 +00001787 // One consequence of doing this now is that it disrupts the count-down
1788 // optimization. That's not always a bad thing though, because in such
1789 // cases it may still be worthwhile to avoid a max.
1790 Cond = OptimizeMax(Cond, CondUse);
Evan Cheng076e0852009-11-17 18:10:11 +00001791
Dan Gohmana10756e2010-01-21 02:09:26 +00001792 // If this exiting block is the latch block, and the condition has only
1793 // one use inside the loop (the branch), use the post-incremented value
1794 // of the induction variable
1795 if (ExitingBlock != LatchBlock) {
1796 // If this exiting block dominates the latch block, it may also use
1797 // the post-inc value if it won't be shared with other uses.
1798 // Check for dominance.
1799 if (!DT.dominates(ExitingBlock, LatchBlock))
1800 continue;
1801 // Check for sharing within the same stride.
1802 bool SameStrideSharing = false;
1803 IVUsersOfOneStride &StrideUses = *IU.IVUsesByStride[CondStride];
1804 for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
1805 E = StrideUses.Users.end(); I != E; ++I) {
1806 if (I->getUser() == Cond)
1807 continue;
1808 if (!I->isUseOfPostIncrementedValue()) {
1809 SameStrideSharing = true;
1810 break;
1811 }
1812 }
1813 if (SameStrideSharing)
1814 continue;
1815 // Check for sharing from a different stride.
1816 if (isa<SCEVConstant>(CondStride) && StrideMightBeShared(CondStride))
1817 continue;
1818 }
1819 if (!Cond->hasOneUse()) {
1820 bool HasOneUseInLoop = true;
1821 for (Value::use_iterator UI = Cond->use_begin(), UE = Cond->use_end();
1822 UI != UE; ++UI) {
1823 Instruction *U = cast<Instruction>(*UI);
1824 if (U == TermBr)
1825 continue;
1826 if (L->contains(U)) {
1827 HasOneUseInLoop = false;
1828 break;
1829 }
1830 }
1831 if (!HasOneUseInLoop)
1832 continue;
Evan Cheng076e0852009-11-17 18:10:11 +00001833 }
1834
David Greene63c94632009-12-23 22:58:38 +00001835 DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "
Dan Gohmana10756e2010-01-21 02:09:26 +00001836 << *Cond << '\n');
Evan Cheng076e0852009-11-17 18:10:11 +00001837
1838 // It's possible for the setcc instruction to be anywhere in the loop, and
1839 // possible for it to have multiple users. If it is not immediately before
1840 // the exiting block branch, move it.
Dan Gohmana10756e2010-01-21 02:09:26 +00001841 if (&*++BasicBlock::iterator(Cond) != TermBr) {
Evan Cheng076e0852009-11-17 18:10:11 +00001842 if (Cond->hasOneUse()) { // Condition has a single use, just move it.
1843 Cond->moveBefore(TermBr);
1844 } else {
1845 // Otherwise, clone the terminating condition and insert into the
1846 // loopend.
Dan Gohmana10756e2010-01-21 02:09:26 +00001847 ICmpInst *OldCond = Cond;
Evan Cheng076e0852009-11-17 18:10:11 +00001848 Cond = cast<ICmpInst>(Cond->clone());
1849 Cond->setName(L->getHeader()->getName() + ".termcond");
1850 ExitingBlock->getInstList().insert(TermBr, Cond);
1851
1852 // Clone the IVUse, as the old use still exists!
Dan Gohmana10756e2010-01-21 02:09:26 +00001853 IU.IVUsesByStride[CondStride]->addUser(CondUse->getOffset(), Cond,
Evan Cheng076e0852009-11-17 18:10:11 +00001854 CondUse->getOperandValToReplace());
Dan Gohmana10756e2010-01-21 02:09:26 +00001855 CondUse = &IU.IVUsesByStride[CondStride]->Users.back();
1856 TermBr->replaceUsesOfWith(OldCond, Cond);
Evan Cheng076e0852009-11-17 18:10:11 +00001857 }
Evan Cheng586f69a2009-11-12 07:35:05 +00001858 }
1859
Evan Cheng076e0852009-11-17 18:10:11 +00001860 // If we get to here, we know that we can transform the setcc instruction to
1861 // use the post-incremented version of the IV, allowing us to coalesce the
1862 // live ranges for the IV correctly.
Dan Gohmana10756e2010-01-21 02:09:26 +00001863 CondUse->setOffset(SE.getMinusSCEV(CondUse->getOffset(), CondStride));
Evan Cheng076e0852009-11-17 18:10:11 +00001864 CondUse->setIsUseOfPostIncrementedValue(true);
1865 Changed = true;
1866
Dan Gohmana10756e2010-01-21 02:09:26 +00001867 PostIncs.insert(Cond);
1868 }
1869
1870 // Determine an insertion point for the loop induction variable increment. It
1871 // must dominate all the post-inc comparisons we just set up, and it must
1872 // dominate the loop latch edge.
1873 IVIncInsertPos = L->getLoopLatch()->getTerminator();
1874 for (SmallPtrSet<Instruction *, 4>::iterator I = PostIncs.begin(),
1875 E = PostIncs.end(); I != E; ++I) {
1876 BasicBlock *BB =
1877 DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
1878 (*I)->getParent());
1879 if (BB == (*I)->getParent())
1880 IVIncInsertPos = *I;
1881 else if (BB != IVIncInsertPos->getParent())
1882 IVIncInsertPos = BB->getTerminator();
1883 }
1884
1885 return Changed;
1886}
1887
1888/// CountRegisters - Note the given register.
1889void LSRInstance::CountRegister(const SCEV *Reg, uint32_t Complexity,
1890 size_t LUIdx) {
1891 std::pair<RegUsesTy::iterator, bool> Pair =
1892 RegUses.insert(std::make_pair(Reg, RegSortData()));
1893 RegSortData &BV = Pair.first->second;
1894 if (Pair.second) {
1895 BV.Index = CurrentArbitraryRegIndex++;
1896 BV.MaxComplexity = Complexity;
1897 RegSequence.push_back(Reg);
1898 } else {
1899 BV.MaxComplexity = std::max(BV.MaxComplexity, Complexity);
1900 }
1901 BV.Bits.resize(std::max(BV.Bits.size(), LUIdx + 1));
1902 BV.Bits.set(LUIdx);
1903}
1904
1905/// CountRegisters - Note which registers are used by the given formula,
1906/// updating RegUses.
1907void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
1908 uint32_t Complexity = F.getComplexity();
1909 if (F.ScaledReg)
1910 CountRegister(F.ScaledReg, Complexity, LUIdx);
1911 for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
1912 E = F.BaseRegs.end(); I != E; ++I)
1913 CountRegister(*I, Complexity, LUIdx);
1914}
1915
1916/// GenerateSymbolicOffsetReuse - Generate reuse formulae using symbolic
1917/// offsets.
1918void LSRInstance::GenerateSymbolicOffsetReuse(LSRUse &LU, unsigned LUIdx,
1919 Formula Base) {
1920 // We can't add a symbolic offset if the address already contains one.
1921 if (Base.AM.BaseGV) return;
1922
1923 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
1924 const SCEV *G = Base.BaseRegs[i];
1925 GlobalValue *GV = ExtractSymbol(G, SE);
1926 if (G->isZero())
1927 continue;
1928 Formula F = Base;
1929 F.AM.BaseGV = GV;
1930 if (!isLegalUse(F.AM, LU.Kind, LU.AccessTy, TLI))
1931 continue;
1932 F.BaseRegs[i] = G;
1933 if (LU.InsertFormula(F))
1934 CountRegisters(LU.Formulae.back(), LUIdx);
Evan Cheng586f69a2009-11-12 07:35:05 +00001935 }
Evan Cheng586f69a2009-11-12 07:35:05 +00001936}
1937
Dan Gohmana10756e2010-01-21 02:09:26 +00001938/// GenerateICmpZeroScaledReuse - For ICmpZero, check to see if we can scale up
1939/// the comparison. For example, x == y -> x*c == y*c.
1940void LSRInstance::GenerateICmpZeroScaledReuse(LSRUse &LU, unsigned LUIdx,
1941 Formula Base) {
1942 if (LU.Kind != LSRUse::ICmpZero) return;
Evan Cheng586f69a2009-11-12 07:35:05 +00001943
Dan Gohmana10756e2010-01-21 02:09:26 +00001944 // Determine the integer type for the base formula.
1945 const Type *IntTy = Base.getType();
1946 if (!IntTy) return;
1947 if (SE.getTypeSizeInBits(IntTy) > 64) return;
1948 IntTy = SE.getEffectiveSCEVType(IntTy);
Evan Cheng586f69a2009-11-12 07:35:05 +00001949
Dan Gohmana10756e2010-01-21 02:09:26 +00001950 assert(!Base.AM.BaseGV && "ICmpZero use is not legal!");
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00001951
Dan Gohmana10756e2010-01-21 02:09:26 +00001952 // Check each interesting stride.
1953 for (SmallSetVector<int64_t, 4>::const_iterator
1954 I = Factors.begin(), E = Factors.end(); I != E; ++I) {
1955 int64_t Factor = *I;
1956 Formula F = Base;
1957
1958 // Check that the multiplication doesn't overflow.
1959 F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs * Factor;
1960 if ((int64_t)F.AM.BaseOffs / Factor != F.AM.BaseOffs)
1961 continue;
1962
1963 // Check that this scale is legal.
1964 if (!isLegalUse(F.AM, LU.Kind, LU.AccessTy, TLI))
1965 continue;
1966
1967 const SCEV *FactorS = SE.getSCEV(ConstantInt::get(IntTy, Factor));
1968
1969 // Check that multiplying with each base register doesn't overflow.
1970 for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
1971 F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
1972 if (getSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
1973 goto next;
1974 }
1975
1976 // Check that multiplying with the scaled register doesn't overflow.
1977 if (F.ScaledReg) {
1978 F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
1979 if (getSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
1980 continue;
1981 }
1982
1983 // If we make it here and it's legal, add it.
1984 if (LU.InsertFormula(F))
1985 CountRegisters(LU.Formulae.back(), LUIdx);
1986 next:;
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00001987 }
Dan Gohmana10756e2010-01-21 02:09:26 +00001988}
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00001989
Dan Gohmana10756e2010-01-21 02:09:26 +00001990/// GenerateFormulaeFromReplacedBaseReg - If removing base register with
1991/// index i from the BaseRegs list and adding the registers in AddOps
1992/// to the address forms an interesting formula, pursue it.
1993void
1994LSRInstance::GenerateFormulaeFromReplacedBaseReg(
1995 LSRUse &LU,
1996 unsigned LUIdx,
1997 const Formula &Base, unsigned i,
1998 const SmallVectorImpl<const SCEV *>
1999 &AddOps) {
2000 if (AddOps.empty()) return;
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00002001
Dan Gohmana10756e2010-01-21 02:09:26 +00002002 Formula F = Base;
2003 std::swap(F.BaseRegs[i], F.BaseRegs.back());
2004 F.BaseRegs.pop_back();
2005 for (SmallVectorImpl<const SCEV *>::const_iterator I = AddOps.begin(),
2006 E = AddOps.end(); I != E; ++I)
2007 F.BaseRegs.push_back(*I);
2008 F.AM.HasBaseReg = !F.BaseRegs.empty();
2009 if (LU.InsertFormula(F)) {
2010 CountRegisters(LU.Formulae.back(), LUIdx);
2011 // Recurse.
2012 GenerateReassociationReuse(LU, LUIdx, LU.Formulae.back());
2013 }
2014}
Evan Cheng81ebdcf2009-11-10 21:14:05 +00002015
Dan Gohmana10756e2010-01-21 02:09:26 +00002016/// GenerateReassociationReuse - Split out subexpressions from adds and
2017/// the bases of addrecs.
2018void LSRInstance::GenerateReassociationReuse(LSRUse &LU, unsigned LUIdx,
2019 Formula Base) {
2020 SmallVector<const SCEV *, 8> AddOps;
2021 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
2022 const SCEV *BaseReg = Base.BaseRegs[i];
2023 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BaseReg)) {
2024 for (SCEVAddExpr::op_iterator J = Add->op_begin(), JE = Add->op_end();
2025 J != JE; ++J) {
2026 // Don't pull a constant into a register if the constant could be
2027 // folded into an immediate field.
2028 if (isAlwaysFoldable(*J, true, LU.Kind, LU.AccessTy, TLI, SE)) continue;
2029 SmallVector<const SCEV *, 8> InnerAddOps;
2030 for (SCEVAddExpr::op_iterator K = Add->op_begin(); K != JE; ++K)
2031 if (K != J)
2032 InnerAddOps.push_back(*K);
2033 // Splitting a 2-operand add both ways is redundant. Pruning this
2034 // now saves compile time.
2035 if (InnerAddOps.size() < 2 && next(J) == JE)
2036 continue;
2037 AddOps.push_back(*J);
2038 const SCEV *InnerAdd = SE.getAddExpr(InnerAddOps);
2039 AddOps.push_back(InnerAdd);
2040 GenerateFormulaeFromReplacedBaseReg(LU, LUIdx, Base, i, AddOps);
2041 AddOps.clear();
2042 }
2043 } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BaseReg)) {
2044 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(AR->getStart())) {
2045 for (SCEVAddExpr::op_iterator J = Add->op_begin(), JE = Add->op_end();
2046 J != JE; ++J) {
2047 // Don't pull a constant into a register if the constant could be
2048 // folded into an immediate field.
2049 if (isAlwaysFoldable(*J, true, LU.Kind, LU.AccessTy, TLI, SE))
2050 continue;
2051 SmallVector<const SCEV *, 8> InnerAddOps;
2052 for (SCEVAddExpr::op_iterator K = Add->op_begin(); K != JE; ++K)
2053 if (K != J)
2054 InnerAddOps.push_back(*K);
2055 AddOps.push_back(*J);
2056 const SCEV *InnerAdd = SE.getAddExpr(InnerAddOps);
2057 AddOps.push_back(SE.getAddRecExpr(InnerAdd,
2058 AR->getStepRecurrence(SE),
2059 AR->getLoop()));
2060 GenerateFormulaeFromReplacedBaseReg(LU, LUIdx, Base, i, AddOps);
2061 AddOps.clear();
2062 }
2063 } else if (!isAlwaysFoldable(AR->getStart(), Base.BaseRegs.size() > 1,
2064 LU.Kind, LU.AccessTy,
2065 TLI, SE)) {
2066 AddOps.push_back(AR->getStart());
2067 AddOps.push_back(SE.getAddRecExpr(SE.getIntegerSCEV(0,
2068 BaseReg->getType()),
2069 AR->getStepRecurrence(SE),
2070 AR->getLoop()));
2071 GenerateFormulaeFromReplacedBaseReg(LU, LUIdx, Base, i, AddOps);
2072 AddOps.clear();
2073 }
2074 }
2075 }
2076}
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00002077
Dan Gohmana10756e2010-01-21 02:09:26 +00002078/// GenerateCombinationReuse - Generate a formula consisting of all of the
2079/// loop-dominating registers added into a single register.
2080void LSRInstance::GenerateCombinationReuse(LSRUse &LU, unsigned LUIdx,
2081 Formula Base) {
2082 if (Base.BaseRegs.size() <= 1) return;
2083
2084 Formula F = Base;
2085 F.BaseRegs.clear();
2086 SmallVector<const SCEV *, 4> Ops;
2087 for (SmallVectorImpl<const SCEV *>::const_iterator
2088 I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) {
2089 const SCEV *BaseReg = *I;
2090 if (BaseReg->properlyDominates(L->getHeader(), &DT) &&
2091 !BaseReg->hasComputableLoopEvolution(L))
2092 Ops.push_back(BaseReg);
2093 else
2094 F.BaseRegs.push_back(BaseReg);
2095 }
2096 if (Ops.size() > 1) {
2097 F.BaseRegs.push_back(SE.getAddExpr(Ops));
2098 if (LU.InsertFormula(F))
2099 CountRegisters(LU.Formulae.back(), LUIdx);
2100 }
2101}
2102
2103/// GenerateScaledReuse - Generate stride factor reuse formulae by making
2104/// use of scaled-offset address modes, for example.
2105void LSRInstance::GenerateScaledReuse(LSRUse &LU, unsigned LUIdx,
2106 Formula Base) {
2107 // Determine the integer type for the base formula.
2108 const Type *IntTy = Base.getType();
2109 if (!IntTy) return;
2110 IntTy = SE.getEffectiveSCEVType(IntTy);
2111
2112 // Check each interesting stride.
2113 for (SmallSetVector<int64_t, 4>::const_iterator
2114 I = Factors.begin(), E = Factors.end(); I != E; ++I) {
2115 int64_t Factor = *I;
2116
2117 // If this Formula already has a scaled register, we can't add another one.
2118 if (Base.AM.Scale != 0)
2119 continue;
2120 Formula F = Base;
2121 F.AM.Scale = Factor;
2122 // Check whether this scale is going to be legal.
2123 if (!isLegalUse(F.AM, LU.Kind, LU.AccessTy, TLI)) {
2124 // As a special-case, handle special out-of-loop Basic users specially.
2125 // TODO: Reconsider this special case.
2126 if (LU.Kind == LSRUse::Basic &&
2127 isLegalUse(F.AM, LSRUse::Special, LU.AccessTy, TLI) &&
2128 !L->contains(LU.UserInst))
2129 LU.Kind = LSRUse::Special;
2130 else
2131 continue;
2132 }
2133 // For each addrec base reg, apply the scale, if possible.
2134 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
2135 if (const SCEVAddRecExpr *AR =
2136 dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
2137 const SCEV *FactorS = SE.getSCEV(ConstantInt::get(IntTy, Factor));
2138 // Divide out the factor, ignoring high bits, since we'll be
2139 // scaling the value back up in the end.
2140 if (const SCEV *Quotient = getSDiv(AR, FactorS, SE, true)) {
2141 // TODO: This could be optimized to avoid all the copying.
2142 Formula NewF = F;
2143 NewF.ScaledReg = Quotient;
2144 std::swap(NewF.BaseRegs[i], NewF.BaseRegs.back());
2145 NewF.BaseRegs.pop_back();
2146 NewF.AM.HasBaseReg = !NewF.BaseRegs.empty();
2147 if (LU.InsertFormula(NewF))
2148 CountRegisters(LU.Formulae.back(), LUIdx);
2149 }
Evan Cheng586f69a2009-11-12 07:35:05 +00002150 }
2151 }
Evan Cheng586f69a2009-11-12 07:35:05 +00002152}
2153
Dan Gohmana10756e2010-01-21 02:09:26 +00002154/// GenerateTruncateReuse - Generate reuse formulae from different IV types.
2155void LSRInstance::GenerateTruncateReuse(LSRUse &LU, unsigned LUIdx,
2156 Formula Base) {
2157 // This requires TargetLowering to tell us which truncates are free.
2158 if (!TLI) return;
Evan Cheng586f69a2009-11-12 07:35:05 +00002159
Dan Gohmana10756e2010-01-21 02:09:26 +00002160 // Don't attempt to truncate symbolic values.
2161 if (Base.AM.BaseGV) return;
2162
2163 // Determine the integer type for the base formula.
2164 const Type *DstTy = Base.getType();
2165 if (!DstTy) return;
2166 DstTy = SE.getEffectiveSCEVType(DstTy);
2167
2168 for (SmallSetVector<const Type *, 4>::const_iterator
2169 I = Types.begin(), E = Types.end(); I != E; ++I) {
2170 const Type *SrcTy = *I;
2171 if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
2172 Formula F = Base;
2173 if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
2174 for (SmallVectorImpl<const SCEV *>::iterator J = F.BaseRegs.begin(),
2175 JE = F.BaseRegs.end(); J != JE; ++J)
2176 *J = SE.getAnyExtendExpr(*J, SrcTy);
2177 if (LU.InsertFormula(F))
2178 CountRegisters(LU.Formulae.back(), LUIdx);
2179 }
2180 }
2181}
2182
2183namespace {
2184
2185/// WorkItem - Helper class for GenerateConstantOffsetReuse. It's used to
2186/// defer modifications so that the search phase doesn't have to worry about
2187/// the data structures moving underneath it.
2188struct WorkItem {
2189 LSRUse *LU;
2190 size_t LUIdx;
2191 int64_t Imm;
2192 const SCEV *OrigReg;
2193
2194 WorkItem(LSRUse *U, size_t LI, int64_t I, const SCEV *R)
2195 : LU(U), LUIdx(LI), Imm(I), OrigReg(R) {}
2196
2197 void print(raw_ostream &OS) const;
2198 void dump() const;
2199};
2200
2201void WorkItem::print(raw_ostream &OS) const {
2202 OS << "in use ";
2203 LU->print(OS);
2204 OS << " (at index " << LUIdx << "), add offset " << Imm
2205 << " and compensate by adjusting refences to " << *OrigReg << "\n";
2206}
2207
2208void WorkItem::dump() const {
2209 print(errs()); errs() << '\n';
2210}
2211
2212}
2213
2214/// GenerateConstantOffsetReuse - Look for registers which are a constant
2215/// distance apart and try to form reuse opportunities between them.
2216void LSRInstance::GenerateConstantOffsetReuse() {
2217 // Group the registers by their value without any added constant offset.
2218 typedef std::map<int64_t, const SCEV *> ImmMapTy;
2219 typedef DenseMap<const SCEV *, ImmMapTy> RegMapTy;
2220 RegMapTy Map;
2221 SmallVector<const SCEV *, 8> Sequence;
2222 for (SmallVectorImpl<const SCEV *>::iterator I = RegSequence.begin(),
2223 E = RegSequence.end(); I != E; ++I) {
2224 const SCEV *Reg = *I;
2225 int64_t Imm = ExtractImmediate(Reg, SE);
2226 std::pair<RegMapTy::iterator, bool> Pair =
2227 Map.insert(std::make_pair(Reg, ImmMapTy()));
2228 if (Pair.second)
2229 Sequence.push_back(Reg);
2230 Pair.first->second.insert(std::make_pair(Imm, *I));
2231 }
2232
2233 // Insert an artificial expression at offset 0 (if there isn't one already),
2234 // as this may lead to more reuse opportunities.
2235 for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(),
2236 E = Sequence.end(); I != E; ++I)
2237 Map.find(*I)->second.insert(ImmMapTy::value_type(0, 0));
2238
2239 // Now examine each set of registers with the same base value. Build up
2240 // a list of work to do and do the work in a separate step so that we're
2241 // not adding formulae and register counts while we're searching.
2242 SmallVector<WorkItem, 32> WorkItems;
2243 for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(),
2244 E = Sequence.end(); I != E; ++I) {
2245 const SCEV *Reg = *I;
2246 const ImmMapTy &Imms = Map.find(Reg)->second;
2247 // Examine each offset.
2248 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
2249 J != JE; ++J) {
2250 const SCEV *OrigReg = J->second;
2251 // Skip the artifical register at offset 0.
2252 if (!OrigReg) continue;
2253
2254 int64_t JImm = J->first;
2255 const SmallBitVector &Bits = RegUses.find(OrigReg)->second.Bits;
2256
2257 // Examine each other offset associated with the same register. This is
2258 // quadradic in the number of registers with the same base, but it's
2259 // uncommon for this to be a large number.
2260 for (ImmMapTy::const_iterator M = Imms.begin(); M != JE; ++M) {
2261 if (M == J) continue;
2262
2263 // Compute the difference between the two.
2264 int64_t Imm = (uint64_t)JImm - M->first;
2265 for (int LUIdx = Bits.find_first(); LUIdx != -1;
2266 LUIdx = Bits.find_next(LUIdx))
2267 // Make a memo of this use, offset, and register tuple.
2268 WorkItems.push_back(WorkItem(&Uses[LUIdx], LUIdx, Imm, OrigReg));
Evan Cheng586f69a2009-11-12 07:35:05 +00002269 }
2270 }
2271 }
2272
Dan Gohmana10756e2010-01-21 02:09:26 +00002273 // Now iterate through the worklist and add new formulae.
2274 for (SmallVectorImpl<WorkItem>::const_iterator I = WorkItems.begin(),
2275 E = WorkItems.end(); I != E; ++I) {
2276 const WorkItem &WI = *I;
2277 LSRUse &LU = *WI.LU;
2278 size_t LUIdx = WI.LUIdx;
2279 int64_t Imm = WI.Imm;
2280 const SCEV *OrigReg = WI.OrigReg;
2281
2282 const Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
2283 const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy,
2284 -(uint64_t)Imm));
2285
2286 for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
2287 Formula F = LU.Formulae[L];
2288 // Use the immediate in the scaled register.
2289 if (F.ScaledReg == OrigReg) {
2290 int64_t Offs = (uint64_t)F.AM.BaseOffs +
2291 Imm * (uint64_t)F.AM.Scale;
2292 // Don't create 50 + reg(-50).
2293 if (F.referencesReg(SE.getSCEV(
2294 ConstantInt::get(IntTy, -(uint64_t)Offs))))
2295 continue;
2296 Formula NewF = F;
2297 NewF.AM.BaseOffs = Offs;
2298 if (!isLegalUse(NewF.AM, LU.Kind, LU.AccessTy, TLI))
2299 continue;
2300 const SCEV *Diff = SE.getAddExpr(NegImmS, NewF.ScaledReg);
2301 if (Diff->isZero()) continue;
2302 NewF.ScaledReg = Diff;
2303 if (LU.InsertFormula(NewF))
2304 CountRegisters(LU.Formulae.back(), LUIdx);
2305 }
2306 // Use the immediate in a base register.
2307 for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
2308 const SCEV *BaseReg = F.BaseRegs[N];
2309 if (BaseReg != OrigReg)
2310 continue;
2311 Formula NewF = F;
2312 NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
2313 if (!isLegalUse(NewF.AM, LU.Kind, LU.AccessTy, TLI))
2314 continue;
2315 const SCEV *Diff = SE.getAddExpr(NegImmS, BaseReg);
2316 if (Diff->isZero()) continue;
2317 // Don't create 50 + reg(-50).
2318 if (Diff ==
2319 SE.getSCEV(ConstantInt::get(IntTy,
2320 -(uint64_t)NewF.AM.BaseOffs)))
2321 continue;
2322 NewF.BaseRegs[N] = Diff;
2323 if (LU.InsertFormula(NewF))
2324 CountRegisters(LU.Formulae.back(), LUIdx);
2325 }
2326 }
2327 }
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00002328}
2329
Dan Gohmana10756e2010-01-21 02:09:26 +00002330/// GenerateAllReuseFormulae - Generate formulae for each use.
2331void
2332LSRInstance::GenerateAllReuseFormulae() {
2333 SmallVector<Formula, 12> Save;
2334 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
2335 LSRUse &LU = Uses[LUIdx];
2336
2337 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
2338 GenerateSymbolicOffsetReuse(LU, LUIdx, LU.Formulae[i]);
2339 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
2340 GenerateICmpZeroScaledReuse(LU, LUIdx, LU.Formulae[i]);
2341 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
2342 GenerateReassociationReuse(LU, LUIdx, LU.Formulae[i]);
2343 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
2344 GenerateCombinationReuse(LU, LUIdx, LU.Formulae[i]);
2345 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
2346 GenerateScaledReuse(LU, LUIdx, LU.Formulae[i]);
2347 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
2348 GenerateTruncateReuse(LU, LUIdx, LU.Formulae[i]);
2349 }
2350
2351 GenerateConstantOffsetReuse();
2352}
2353
2354/// GenerateLoopInvariantRegisterUses - Check for other uses of loop-invariant
2355/// values which we're tracking. These other uses will pin these values in
2356/// registers, making them less profitable for elimination.
2357/// TODO: This currently misses non-constant addrec step registers.
2358/// TODO: Should this give more weight to users inside the loop?
2359void
2360LSRInstance::GenerateLoopInvariantRegisterUses() {
2361 for (size_t i = 0, e = RegSequence.size(); i != e; ++i)
2362 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(RegSequence[i])) {
2363 const Value *V = U->getValue();
2364 if (const Instruction *Inst = dyn_cast<Instruction>(V))
2365 if (L->contains(Inst)) continue;
2366 for (Value::use_const_iterator UI = V->use_begin(), UE = V->use_end();
2367 UI != UE; ++UI) {
2368 const Instruction *UserInst = dyn_cast<Instruction>(*UI);
2369 // Ignore non-instructions.
2370 if (!UserInst)
2371 continue;
2372 // Ignore instructions in other functions (as can happen with
2373 // Constants).
2374 if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
2375 continue;
2376 // Ignore instructions not dominated by the loop.
2377 const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
2378 UserInst->getParent() :
2379 cast<PHINode>(UserInst)->getIncomingBlock(
2380 PHINode::getIncomingValueNumForOperand(UI.getOperandNo()));
2381 if (!DT.dominates(L->getHeader(), UseBB))
2382 continue;
2383 // Ignore uses which are part of other SCEV expressions, to avoid
2384 // analyzing them multiple times.
2385 if (SE.isSCEVable(UserInst->getType()) &&
2386 !isa<SCEVUnknown>(SE.getSCEV(const_cast<Instruction *>(UserInst))))
2387 continue;
2388 // Ignore icmp instructions which are already being analyzed.
2389 if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
2390 unsigned OtherIdx = !UI.getOperandNo();
2391 Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
2392 if (SE.getSCEV(OtherOp)->hasComputableLoopEvolution(L))
2393 continue;
2394 }
2395
2396 LSRUse &LU = getNewUse();
2397 LU.UserInst = const_cast<Instruction *>(UserInst);
2398 LU.OperandValToReplace = UI.getUse();
2399 LU.InsertSupplementalFormula(U);
2400 CountRegisters(LU.Formulae.back(), Uses.size() - 1);
2401 }
2402 }
2403}
2404
2405#ifndef NDEBUG
2406
2407static void debug_winner(SmallVector<LSRUse, 16> const &Uses) {
2408 dbgs() << "LSR has selected formulae for each use:\n";
2409 for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
2410 E = Uses.end(); I != E; ++I) {
2411 const LSRUse &LU = *I;
2412 dbgs() << " ";
2413 LU.print(dbgs());
2414 dbgs() << '\n';
2415 dbgs() << " ";
2416 LU.Formulae.front().print(dbgs());
2417 dbgs() << "\n";
2418 }
2419}
2420
2421#endif
2422
2423LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
2424 : IU(P->getAnalysis<IVUsers>()),
2425 SE(P->getAnalysis<ScalarEvolution>()),
2426 DT(P->getAnalysis<DominatorTree>()),
2427 TLI(tli), L(l), Changed(false), CurrentArbitraryRegIndex(0) {
Devang Patel0f54dcb2007-03-06 21:14:09 +00002428
Dan Gohman03e896b2009-11-05 21:11:53 +00002429 // If LoopSimplify form is not available, stay out of trouble.
Dan Gohmana10756e2010-01-21 02:09:26 +00002430 if (!L->isLoopSimplifyForm()) return;
Dan Gohman03e896b2009-11-05 21:11:53 +00002431
Dan Gohmana10756e2010-01-21 02:09:26 +00002432 // If there's no interesting work to be done, bail early.
2433 if (IU.IVUsesByStride.empty()) return;
Dan Gohman80b0f8c2009-03-09 20:34:59 +00002434
Dan Gohmana10756e2010-01-21 02:09:26 +00002435 DEBUG(dbgs() << "\nLSR on loop ";
2436 WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
2437 dbgs() << ":\n");
Dan Gohmanf7912df2009-03-09 20:46:50 +00002438
Dan Gohmana10756e2010-01-21 02:09:26 +00002439 // Sort the StrideOrder so we process larger strides first.
2440 std::stable_sort(IU.StrideOrder.begin(), IU.StrideOrder.end(),
2441 StrideCompare(SE));
Chris Lattner010de252005-08-08 05:28:22 +00002442
Dan Gohmana10756e2010-01-21 02:09:26 +00002443 /// OptimizeShadowIV - If IV is used in a int-to-float cast
2444 /// inside the loop then try to eliminate the cast opeation.
2445 OptimizeShadowIV();
Evan Cheng5792f512009-05-11 22:33:01 +00002446
Dan Gohmana10756e2010-01-21 02:09:26 +00002447 // Change loop terminating condition to use the postinc iv when possible.
2448 Instruction *IVIncInsertPos;
2449 Changed |= OptimizeLoopTermCond(IVIncInsertPos);
Chris Lattner010de252005-08-08 05:28:22 +00002450
Dan Gohmana10756e2010-01-21 02:09:26 +00002451 for (SmallVectorImpl<const SCEV *>::const_iterator SIter =
2452 IU.StrideOrder.begin(), SEnd = IU.StrideOrder.end();
2453 SIter != SEnd; ++SIter) {
2454 const SCEV *Stride = *SIter;
Misha Brukmanfd939082005-04-21 23:48:37 +00002455
Dan Gohmana10756e2010-01-21 02:09:26 +00002456 // Collect interesting types.
2457 Types.insert(SE.getEffectiveSCEVType(Stride->getType()));
Evan Chengd1d6b5c2006-03-16 21:53:05 +00002458
Dan Gohmana10756e2010-01-21 02:09:26 +00002459 // Collect interesting factors.
2460 for (SmallVectorImpl<const SCEV *>::const_iterator NewStrideIter =
2461 SIter + 1; NewStrideIter != SEnd; ++NewStrideIter) {
2462 const SCEV *OldStride = Stride;
2463 const SCEV *NewStride = *NewStrideIter;
Nate Begemaneaa13852004-10-18 21:08:22 +00002464
Dan Gohmana10756e2010-01-21 02:09:26 +00002465 if (SE.getTypeSizeInBits(OldStride->getType()) !=
2466 SE.getTypeSizeInBits(NewStride->getType())) {
2467 if (SE.getTypeSizeInBits(OldStride->getType()) >
2468 SE.getTypeSizeInBits(NewStride->getType()))
2469 NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
2470 else
2471 OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
2472 }
2473 if (const SCEVConstant *Factor =
2474 dyn_cast_or_null<SCEVConstant>(getSDiv(NewStride, OldStride,
2475 SE, true)))
2476 if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
2477 Factors.insert(Factor->getValue()->getValue().getSExtValue());
2478 }
Dan Gohman6bec5bb2009-12-18 00:06:20 +00002479
Dan Gohmana10756e2010-01-21 02:09:26 +00002480 std::map<const SCEV *, IVUsersOfOneStride *>::const_iterator SI =
2481 IU.IVUsesByStride.find(Stride);
2482 assert(SI != IU.IVUsesByStride.end() && "Stride doesn't exist!");
2483 for (ilist<IVStrideUse>::const_iterator UI = SI->second->Users.begin(),
2484 E = SI->second->Users.end(); UI != E; ++UI) {
2485 // Record the uses.
2486 LSRUse &LU = getNewUse();
2487 LU.UserInst = UI->getUser();
2488 LU.OperandValToReplace = UI->getOperandValToReplace();
2489 if (isAddressUse(LU.UserInst, LU.OperandValToReplace)) {
2490 LU.Kind = LSRUse::Address;
2491 LU.AccessTy = getAccessType(LU.UserInst);
2492 }
2493 if (UI->isUseOfPostIncrementedValue())
2494 LU.PostIncLoop = L;
Dan Gohman6bec5bb2009-12-18 00:06:20 +00002495
Dan Gohmana10756e2010-01-21 02:09:26 +00002496 const SCEV *S = IU.getCanonicalExpr(*UI);
2497
2498 // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
2499 // (N - i == 0), and this allows (N - i) to be the expression that we
2500 // work with rather than just N or i, so we can consider the register
2501 // requirements for both N and i at the same time. Limiting this code
2502 // to equality icmps is not a problem because all interesting loops
2503 // use equality icmps, thanks to IndVarSimplify.
2504 if (ICmpInst *CI = dyn_cast<ICmpInst>(LU.UserInst))
2505 if (CI->isEquality()) {
2506 // Swap the operands if needed to put the OperandValToReplace on
2507 // the left, for consistency.
2508 Value *NV = CI->getOperand(1);
2509 if (NV == LU.OperandValToReplace) {
2510 CI->setOperand(1, CI->getOperand(0));
2511 CI->setOperand(0, NV);
2512 }
2513
2514 // x == y --> x - y == 0
2515 const SCEV *N = SE.getSCEV(NV);
2516 if (N->isLoopInvariant(L)) {
2517 LU.Kind = LSRUse::ICmpZero;
2518 S = SE.getMinusSCEV(N, S);
2519 }
2520
2521 // -1 and the negations of all interesting strides (except the
2522 // negation of -1) are now also interesting.
2523 for (size_t i = 0, e = Factors.size(); i != e; ++i)
2524 if (Factors[i] != -1)
2525 Factors.insert(-(uint64_t)Factors[i]);
2526 Factors.insert(-1);
2527 }
2528
2529 // Ok, now enumerate all the different formulae we can find to compute
2530 // the value for this expression.
2531 LU.InsertInitialFormula(S, L, SE, DT);
2532 CountRegisters(LU.Formulae.back(), Uses.size() - 1);
2533 }
Evan Cheng81ebdcf2009-11-10 21:14:05 +00002534 }
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00002535
Dan Gohmana10756e2010-01-21 02:09:26 +00002536 // If all uses use the same type, don't bother looking for truncation-based
2537 // reuse.
2538 if (Types.size() == 1)
2539 Types.clear();
2540
2541 // Now use the reuse data to generate a bunch of interesting ways
2542 // to formulate the values needed for the uses.
2543 GenerateAllReuseFormulae();
2544
2545 // If there are any uses of registers that we're tracking that have escaped
2546 // IVUsers' attention, add trivial uses for them, so that the register
2547 // voting process takes the into consideration.
2548 GenerateLoopInvariantRegisterUses();
2549
2550 // Sort the formulae. TODO: This is redundantly sorted below.
2551 for (SmallVectorImpl<LSRUse>::iterator I = Uses.begin(), E = Uses.end();
2552 I != E; ++I) {
2553 LSRUse &LU = *I;
2554 std::stable_sort(LU.Formulae.begin(), LU.Formulae.end(),
2555 ComplexitySorter());
2556 }
2557
2558 // Ok, we've now collected all the uses and noted their register uses. The
2559 // next step is to start looking at register reuse possibilities.
2560 DEBUG(print(dbgs()); dbgs() << '\n');
2561
2562 // Start by assuming we'll assign each use its own register. This is
2563 // sometimes called "full" strength reduction, or "superhero" mode.
2564 // Sometimes this is the best solution, but if there are opportunities for
2565 // reuse we may find a better solution.
2566 Score CurScore;
2567 CurScore.RateInitial(Uses, L, SE);
2568
2569 // Create a sorted list of registers with those with the most uses appearing
2570 // earlier in the list. We'll visit them first, as they're the most likely
2571 // to represent profitable reuse opportunities.
2572 SmallVector<RegCount, 8> RegOrder;
2573 for (SmallVectorImpl<const SCEV *>::const_iterator I =
2574 RegSequence.begin(), E = RegSequence.end(); I != E; ++I)
2575 RegOrder.push_back(RegCount(*I, RegUses.find(*I)->second));
2576 std::stable_sort(RegOrder.begin(), RegOrder.end());
2577
2578 // Visit each register. Determine which ones represent profitable reuse
2579 // opportunities and remember them.
2580 // TODO: Extract this code into a function.
2581 for (SmallVectorImpl<RegCount>::const_iterator I = RegOrder.begin(),
2582 E = RegOrder.end(); I != E; ++I) {
2583 const SCEV *Reg = I->Reg;
2584 const SmallBitVector &Bits = I->Sort.Bits;
2585
2586 // Registers with only one use don't represent reuse opportunities, so
2587 // when we get there, we're done.
2588 if (Bits.count() <= 1) break;
2589
2590 DEBUG(dbgs() << "Reg " << *Reg << ": ";
2591 I->Sort.print(dbgs());
2592 dbgs() << '\n');
2593
2594 // Determine the total number of registers will be needed if we make use
2595 // of the reuse opportunity represented by the current register.
2596 Score NewScore;
2597 NewScore.Rate(Reg, Bits, Uses, L, SE);
2598
2599 // Now decide whether this register's reuse opportunity is an overall win.
2600 // Currently the decision is heavily skewed for register pressure.
2601 if (!(NewScore < CurScore)) {
2602 continue;
2603 }
2604
2605 // Ok, use this opportunity.
2606 DEBUG(dbgs() << "This candidate has been accepted.\n");
2607 CurScore = NewScore;
2608
2609 // Now that we've selected a new reuse opportunity, delete formulae that
2610 // do not participate in that opportunity.
2611 for (int j = Bits.find_first(); j != -1; j = Bits.find_next(j)) {
2612 LSRUse &LU = Uses[j];
2613 for (unsigned k = 0, h = LU.Formulae.size(); k != h; ++k) {
2614 Formula &F = LU.Formulae[k];
2615 if (!F.referencesReg(Reg)) {
2616 std::swap(LU.Formulae[k], LU.Formulae.back());
2617 LU.Formulae.pop_back();
2618 --k; --h;
2619 }
2620 }
2621 // Also re-sort the list to put the formulae with the fewest registers
2622 // at the front.
2623 // TODO: Do this earlier, we don't need it each time.
2624 std::stable_sort(LU.Formulae.begin(), LU.Formulae.end(),
2625 ComplexitySorter());
2626 }
2627 }
2628
2629 // Ok, we've now made all our decisions. The first formula for each use
2630 // will be used.
2631 DEBUG(dbgs() << "Concluding, we need "; CurScore.print(dbgs());
2632 dbgs() << ".\n";
2633 debug_winner(Uses));
2634
2635 // Free memory no longer needed.
2636 RegOrder.clear();
2637 Factors.clear();
2638 Types.clear();
2639 RegUses.clear();
2640 RegSequence.clear();
2641
2642 // Keep track of instructions we may have made dead, so that
2643 // we can remove them after we are done working.
2644 SmallVector<WeakVH, 16> DeadInsts;
2645
2646 SCEVExpander Rewriter(SE);
2647 Rewriter.disableCanonicalMode();
2648 Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
2649
2650 // Expand the new value definitions and update the users.
2651 for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
2652 E = Uses.end(); I != E; ++I) {
2653 // Formulae should be legal.
2654 DEBUG(for (SmallVectorImpl<Formula>::const_iterator J = I->Formulae.begin(),
2655 JE = I->Formulae.end(); J != JE; ++J)
2656 assert(isLegalUse(J->AM, I->Kind, I->AccessTy, TLI) &&
2657 "Illegal formula generated!"));
2658
2659 // Expand the new code and update the user.
2660 I->Rewrite(L, Rewriter, DeadInsts, SE, DT, P);
2661 Changed = true;
2662 }
2663
2664 // Clean up after ourselves. This must be done before deleting any
2665 // instructions.
2666 Rewriter.clear();
2667
2668 Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
2669}
2670
2671void LSRInstance::print(raw_ostream &OS) const {
2672 OS << "LSR has identified the following interesting factors and types: ";
2673 bool First = true;
2674
2675 for (SmallSetVector<int64_t, 4>::const_iterator
2676 I = Factors.begin(), E = Factors.end(); I != E; ++I) {
2677 if (!First) OS << ", ";
2678 First = false;
2679 OS << '*' << *I;
2680 }
2681
2682 for (SmallSetVector<const Type *, 4>::const_iterator
2683 I = Types.begin(), E = Types.end(); I != E; ++I) {
2684 if (!First) OS << ", ";
2685 First = false;
2686 OS << '(' << **I << ')';
2687 }
2688 OS << '\n';
2689
2690 OS << "LSR is examining the following uses, and candidate formulae:\n";
2691 for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
2692 E = Uses.end(); I != E; ++I) {
2693 const LSRUse &LU = *I;
2694 dbgs() << " ";
2695 LU.print(OS);
2696 OS << '\n';
2697 for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
2698 JE = LU.Formulae.end(); J != JE; ++J) {
2699 OS << " ";
2700 J->print(OS);
2701 OS << "\n";
2702 }
2703 }
2704}
2705
2706void LSRInstance::dump() const {
2707 print(errs()); errs() << '\n';
2708}
2709
2710namespace {
2711
2712class LoopStrengthReduce : public LoopPass {
2713 /// TLI - Keep a pointer of a TargetLowering to consult for determining
2714 /// transformation profitability.
2715 const TargetLowering *const TLI;
2716
2717public:
2718 static char ID; // Pass ID, replacement for typeid
2719 explicit LoopStrengthReduce(const TargetLowering *tli = NULL);
2720
2721private:
2722 bool runOnLoop(Loop *L, LPPassManager &LPM);
2723 void getAnalysisUsage(AnalysisUsage &AU) const;
2724};
2725
2726}
2727
2728char LoopStrengthReduce::ID = 0;
2729static RegisterPass<LoopStrengthReduce>
2730X("loop-reduce", "Loop Strength Reduction");
2731
2732Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
2733 return new LoopStrengthReduce(TLI);
2734}
2735
2736LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
2737 : LoopPass(&ID), TLI(tli) {}
2738
2739void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
2740 // We split critical edges, so we change the CFG. However, we do update
2741 // many analyses if they are around.
2742 AU.addPreservedID(LoopSimplifyID);
2743 AU.addPreserved<LoopInfo>();
2744 AU.addPreserved("domfrontier");
2745
2746 AU.addRequiredID(LoopSimplifyID);
2747 AU.addRequired<DominatorTree>();
2748 AU.addPreserved<DominatorTree>();
2749 AU.addRequired<ScalarEvolution>();
2750 AU.addPreserved<ScalarEvolution>();
2751 AU.addRequired<IVUsers>();
2752 AU.addPreserved<IVUsers>();
2753}
2754
2755bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
2756 bool Changed = false;
2757
2758 // Run the main LSR transformation.
2759 Changed |= LSRInstance(TLI, L, this).getChanged();
2760
Dan Gohmanafc36a92009-05-02 18:29:22 +00002761 // At this point, it is worth checking to see if any recurrence PHIs are also
Dan Gohman35738ac2009-05-04 22:30:44 +00002762 // dead, so that we can remove them as well.
Dan Gohman9fff2182010-01-05 16:31:45 +00002763 Changed |= DeleteDeadPHIs(L->getHeader());
Dan Gohmanafc36a92009-05-02 18:29:22 +00002764
Evan Cheng1ce75dc2008-07-07 19:51:32 +00002765 return Changed;
Nate Begemaneaa13852004-10-18 21:08:22 +00002766}