blob: acc66246c309d9e8d0dfe5c7ccd0ea0794633c27 [file] [log] [blame]
Dan Gohman2d1be872009-04-16 03:18:22 +00001//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
Misha Brukmanfd939082005-04-21 23:48:37 +00002//
Nate Begemaneaa13852004-10-18 21:08:22 +00003// The LLVM Compiler Infrastructure
4//
Chris Lattner4ee451d2007-12-29 20:36:04 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Misha Brukmanfd939082005-04-21 23:48:37 +00007//
Nate Begemaneaa13852004-10-18 21:08:22 +00008//===----------------------------------------------------------------------===//
9//
Dan Gohmancec8f9d2009-05-19 20:37:36 +000010// This transformation analyzes and transforms the induction variables (and
11// computations derived from them) into forms suitable for efficient execution
12// on the target.
13//
Nate Begemaneaa13852004-10-18 21:08:22 +000014// This pass performs a strength reduction on array references inside loops that
Dan Gohmancec8f9d2009-05-19 20:37:36 +000015// have as one or more of their components the loop induction variable, it
16// rewrites expressions to take advantage of scaled-index addressing modes
17// available on the target, and it performs a variety of other optimizations
18// related to loop induction variables.
Nate Begemaneaa13852004-10-18 21:08:22 +000019//
Dan Gohmana10756e2010-01-21 02:09:26 +000020// Terminology note: this code has a lot of handling for "post-increment" or
21// "post-inc" users. This is not talking about post-increment addressing modes;
22// it is instead talking about code like this:
23//
24// %i = phi [ 0, %entry ], [ %i.next, %latch ]
25// ...
26// %i.next = add %i, 1
27// %c = icmp eq %i.next, %n
28//
29// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
30// it's useful to think about these as the same register, with some uses using
31// the value of the register before the add and some using // it after. In this
32// example, the icmp is a post-increment user, since it uses %i.next, which is
33// the value of the induction variable after the increment. The other common
34// case of post-increment users is users outside the loop.
35//
36// TODO: More sophistication in the way Formulae are generated.
37//
38// TODO: Handle multiple loops at a time.
39//
40// TODO: test/CodeGen/X86/full-lsr.ll should get full lsr. The problem is
41// that {0,+,1}<%bb> is getting picked first because all 7 uses can
42// use it, and while it's a pretty good solution, it means that LSR
43// doesn't look further to find an even better solution.
44//
45// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
46// instead of a GlobalValue?
47//
48// TODO: When truncation is free, truncate ICmp users' operands to make it a
49// smaller encoding (on x86 at least).
50//
51// TODO: When a negated register is used by an add (such as in a list of
52// multiple base registers, or as the increment expression in an addrec),
53// we may not actually need both reg and (-1 * reg) in registers; the
54// negation can be implemented by using a sub instead of an add. The
55// lack of support for taking this into consideration when making
56// register pressure decisions is partly worked around by the "Special"
57// use kind.
58//
Nate Begemaneaa13852004-10-18 21:08:22 +000059//===----------------------------------------------------------------------===//
60
Chris Lattnerbe3e5212005-08-03 23:30:08 +000061#define DEBUG_TYPE "loop-reduce"
Nate Begemaneaa13852004-10-18 21:08:22 +000062#include "llvm/Transforms/Scalar.h"
63#include "llvm/Constants.h"
64#include "llvm/Instructions.h"
Dan Gohmane5b01be2007-05-04 14:59:09 +000065#include "llvm/IntrinsicInst.h"
Jeff Cohen2f3c9b72005-03-04 04:04:26 +000066#include "llvm/DerivedTypes.h"
Dan Gohman81db61a2009-05-12 02:17:14 +000067#include "llvm/Analysis/IVUsers.h"
Dan Gohmana10756e2010-01-21 02:09:26 +000068#include "llvm/Analysis/Dominators.h"
Devang Patel0f54dcb2007-03-06 21:14:09 +000069#include "llvm/Analysis/LoopPass.h"
Nate Begeman16997482005-07-30 00:15:07 +000070#include "llvm/Analysis/ScalarEvolutionExpander.h"
Chris Lattnere0391be2005-08-12 22:06:11 +000071#include "llvm/Transforms/Utils/BasicBlockUtils.h"
Nate Begemaneaa13852004-10-18 21:08:22 +000072#include "llvm/Transforms/Utils/Local.h"
Dan Gohmana10756e2010-01-21 02:09:26 +000073#include "llvm/ADT/SmallBitVector.h"
74#include "llvm/ADT/SetVector.h"
Nate Begeman16997482005-07-30 00:15:07 +000075#include "llvm/Support/Debug.h"
Dan Gohmanafc36a92009-05-02 18:29:22 +000076#include "llvm/Support/ValueHandle.h"
Daniel Dunbar460f6562009-07-26 09:48:23 +000077#include "llvm/Support/raw_ostream.h"
Evan Chengd277f2c2006-03-13 23:14:23 +000078#include "llvm/Target/TargetLowering.h"
Jeff Cohencfb1d422005-07-30 18:22:27 +000079#include <algorithm>
Nate Begemaneaa13852004-10-18 21:08:22 +000080using namespace llvm;
81
Dan Gohmana10756e2010-01-21 02:09:26 +000082namespace {
Nate Begemaneaa13852004-10-18 21:08:22 +000083
Dan Gohmana10756e2010-01-21 02:09:26 +000084// Constant strides come first which in turns are sorted by their absolute
85// values. If absolute values are the same, then positive strides comes first.
86// e.g.
87// 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X
88struct StrideCompare {
89 const ScalarEvolution &SE;
90 explicit StrideCompare(const ScalarEvolution &se) : SE(se) {}
91
92 bool operator()(const SCEV *const &LHS, const SCEV *const &RHS) const {
93 const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS);
94 const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
95 if (LHSC && RHSC) {
96 unsigned BitWidth = std::max(SE.getTypeSizeInBits(LHS->getType()),
97 SE.getTypeSizeInBits(RHS->getType()));
98 APInt LV = LHSC->getValue()->getValue();
99 APInt RV = RHSC->getValue()->getValue();
100 LV.sextOrTrunc(BitWidth);
101 RV.sextOrTrunc(BitWidth);
102 APInt ALV = LV.abs();
103 APInt ARV = RV.abs();
104 if (ALV == ARV) {
105 if (LV != RV)
106 return LV.sgt(RV);
107 } else {
108 return ALV.ult(ARV);
109 }
110
111 // If it's the same value but different type, sort by bit width so
112 // that we emit larger induction variables before smaller
113 // ones, letting the smaller be re-written in terms of larger ones.
114 return SE.getTypeSizeInBits(RHS->getType()) <
115 SE.getTypeSizeInBits(LHS->getType());
116 }
117 return LHSC && !RHSC;
118 }
119};
120
121/// RegSortData - This class holds data which is used to order reuse
122/// candidates.
123class RegSortData {
124public:
125 /// Bits - This represents the set of LSRUses (by index) which reference a
126 /// particular register.
127 SmallBitVector Bits;
128
129 /// MaxComplexity - This represents the greatest complexity (see the comments
130 /// on Formula::getComplexity) seen with a particular register.
131 uint32_t MaxComplexity;
132
133 /// Index - This holds an arbitrary value used as a last-resort tie breaker
134 /// to ensure deterministic behavior.
135 unsigned Index;
136
137 RegSortData() {}
138
139 void print(raw_ostream &OS) const;
140 void dump() const;
141};
142
143}
144
145void RegSortData::print(raw_ostream &OS) const {
146 OS << "[NumUses=" << Bits.count()
147 << ", MaxComplexity=";
148 OS.write_hex(MaxComplexity);
149 OS << ", Index=" << Index << ']';
150}
151
152void RegSortData::dump() const {
153 print(errs()); errs() << '\n';
154}
Dan Gohmanc17e0cf2009-02-20 04:17:46 +0000155
Chris Lattner0e5f4992006-12-19 21:40:18 +0000156namespace {
Dale Johannesendc42f482007-03-20 00:47:50 +0000157
Dan Gohmana10756e2010-01-21 02:09:26 +0000158/// RegCount - This is a helper class to sort a given set of registers
159/// according to associated RegSortData values.
160class RegCount {
161public:
162 const SCEV *Reg;
163 RegSortData Sort;
Dale Johannesendc42f482007-03-20 00:47:50 +0000164
Dan Gohmana10756e2010-01-21 02:09:26 +0000165 RegCount(const SCEV *R, const RegSortData &RSD)
166 : Reg(R), Sort(RSD) {}
Evan Chengd1d6b5c2006-03-16 21:53:05 +0000167
Dan Gohmana10756e2010-01-21 02:09:26 +0000168 // Sort by count. Returning true means the register is preferred.
169 bool operator<(const RegCount &Other) const {
170 // Sort by the number of unique uses of this register.
171 unsigned A = Sort.Bits.count();
172 unsigned B = Other.Sort.Bits.count();
173 if (A != B) return A > B;
Evan Chengd1d6b5c2006-03-16 21:53:05 +0000174
Dan Gohmana10756e2010-01-21 02:09:26 +0000175 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
176 const SCEVAddRecExpr *BR = dyn_cast<SCEVAddRecExpr>(Other.Reg);
177 // AddRecs have higher priority than other things.
178 if (!BR) return true;
Evan Chengd1d6b5c2006-03-16 21:53:05 +0000179
Dan Gohmana10756e2010-01-21 02:09:26 +0000180 // Prefer affine values.
181 if (AR->isAffine() != BR->isAffine())
182 return AR->isAffine();
Nate Begeman16997482005-07-30 00:15:07 +0000183
Dan Gohmana10756e2010-01-21 02:09:26 +0000184 const Loop *AL = AR->getLoop();
185 const Loop *BL = BR->getLoop();
186 if (AL != BL) {
187 unsigned ADepth = AL->getLoopDepth();
188 unsigned BDepth = BL->getLoopDepth();
189 // Prefer a less deeply nested addrec.
190 if (ADepth != BDepth)
191 return ADepth < BDepth;
Chris Lattner7e608bb2005-08-02 02:52:02 +0000192
Dan Gohmana10756e2010-01-21 02:09:26 +0000193 // Different loops at the same depth; do something arbitrary.
194 BasicBlock *AH = AL->getHeader();
195 BasicBlock *BH = BL->getHeader();
196 for (Function::iterator I = AH, E = AH->getParent()->end(); I != E; ++I)
197 if (&*I == BH) return true;
198 return false;
Bill Wendling411052b2008-11-29 03:43:04 +0000199 }
Jim Grosbach56a1f802009-11-17 17:53:56 +0000200
Dan Gohmana10756e2010-01-21 02:09:26 +0000201 // Sort addrecs by stride.
202 const SCEV *AStep = AR->getOperand(1);
203 const SCEV *BStep = BR->getOperand(1);
204 if (AStep != BStep) {
205 if (const SCEVConstant *AC = dyn_cast<SCEVConstant>(AStep)) {
206 const SCEVConstant *BC = dyn_cast<SCEVConstant>(BStep);
207 if (!BC) return true;
208 // Arbitrarily prefer wider registers.
209 if (AC->getValue()->getValue().getBitWidth() !=
210 BC->getValue()->getValue().getBitWidth())
211 return AC->getValue()->getValue().getBitWidth() >
212 BC->getValue()->getValue().getBitWidth();
213 // Ignore the sign bit, assuming that striding by a negative value
214 // is just as easy as by a positive value.
215 // Prefer the addrec with the lesser absolute value stride, as it
216 // will allow uses to have simpler addressing modes.
217 return AC->getValue()->getValue().abs()
218 .ult(BC->getValue()->getValue().abs());
219 }
220 }
221
222 // Then sort by the register which will permit the simplest uses.
223 // This is a heuristic; currently we only track the most complex use as a
224 // representative.
225 if (Sort.MaxComplexity != Other.Sort.MaxComplexity)
226 return Sort.MaxComplexity < Other.Sort.MaxComplexity;
227
228 // Then sort them by their start values.
229 const SCEV *AStart = AR->getStart();
230 const SCEV *BStart = BR->getStart();
231 if (AStart != BStart) {
232 if (const SCEVConstant *AC = dyn_cast<SCEVConstant>(AStart)) {
233 const SCEVConstant *BC = dyn_cast<SCEVConstant>(BStart);
234 if (!BC) return true;
235 // Arbitrarily prefer wider registers.
236 if (AC->getValue()->getValue().getBitWidth() !=
237 BC->getValue()->getValue().getBitWidth())
238 return AC->getValue()->getValue().getBitWidth() >
239 BC->getValue()->getValue().getBitWidth();
240 // Prefer positive over negative if the absolute values are the same.
241 if (AC->getValue()->getValue().abs() ==
242 BC->getValue()->getValue().abs())
243 return AC->getValue()->getValue().isStrictlyPositive();
244 // Prefer the addrec with the lesser absolute value start.
245 return AC->getValue()->getValue().abs()
246 .ult(BC->getValue()->getValue().abs());
247 }
248 }
249 } else {
250 // AddRecs have higher priority than other things.
251 if (isa<SCEVAddRecExpr>(Other.Reg)) return false;
252 // Sort by the register which will permit the simplest uses.
253 // This is a heuristic; currently we only track the most complex use as a
254 // representative.
255 if (Sort.MaxComplexity != Other.Sort.MaxComplexity)
256 return Sort.MaxComplexity < Other.Sort.MaxComplexity;
257 }
258
259
260 // Tie-breaker: the arbitrary index, to ensure a reliable ordering.
261 return Sort.Index < Other.Sort.Index;
Nate Begemaneaa13852004-10-18 21:08:22 +0000262 }
Dan Gohmana10756e2010-01-21 02:09:26 +0000263
264 void print(raw_ostream &OS) const;
265 void dump() const;
266};
267
268}
269
270void RegCount::print(raw_ostream &OS) const {
271 OS << *Reg << ':';
272 Sort.print(OS);
273}
274
275void RegCount::dump() const {
276 print(errs()); errs() << '\n';
277}
278
279namespace {
280
281/// Formula - This class holds information that describes a formula for
282/// satisfying a use. It may include broken-out immediates and scaled registers.
283struct Formula {
284 /// AM - This is used to represent complex addressing, as well as other kinds
285 /// of interesting uses.
286 TargetLowering::AddrMode AM;
287
288 /// BaseRegs - The list of "base" registers for this use. When this is
289 /// non-empty, AM.HasBaseReg should be set to true.
290 SmallVector<const SCEV *, 2> BaseRegs;
291
292 /// ScaledReg - The 'scaled' register for this use. This should be non-null
293 /// when AM.Scale is not zero.
294 const SCEV *ScaledReg;
295
296 Formula() : ScaledReg(0) {}
297
298 unsigned getNumRegs() const;
299 uint32_t getComplexity() const;
300 const Type *getType() const;
301
302 void InitialMatch(const SCEV *S, Loop *L,
303 ScalarEvolution &SE, DominatorTree &DT);
304
305 /// referencesReg - Test if this formula references the given register.
306 bool referencesReg(const SCEV *S) const {
307 return S == ScaledReg ||
308 std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
309 }
310
311 bool operator==(const Formula &Other) const {
312 return BaseRegs == Other.BaseRegs &&
313 ScaledReg == Other.ScaledReg &&
314 AM.Scale == Other.AM.Scale &&
315 AM.BaseOffs == Other.AM.BaseOffs &&
316 AM.BaseGV == Other.AM.BaseGV;
317 }
318
319 // This sorts at least partially based on host pointer values which are
320 // not deterministic, so it is only usable for uniqification.
321 bool operator<(const Formula &Other) const {
322 if (BaseRegs != Other.BaseRegs)
323 return BaseRegs < Other.BaseRegs;
324 if (ScaledReg != Other.ScaledReg)
325 return ScaledReg < Other.ScaledReg;
326 if (AM.Scale != Other.AM.Scale)
327 return AM.Scale < Other.AM.Scale;
328 if (AM.BaseOffs != Other.AM.BaseOffs)
329 return AM.BaseOffs < Other.AM.BaseOffs;
330 if (AM.BaseGV != Other.AM.BaseGV)
331 return AM.BaseGV < Other.AM.BaseGV;
332 return false;
333 }
334
335 void print(raw_ostream &OS) const;
336 void dump() const;
337};
338
339}
340
341/// getNumRegs - Return the total number of register operands used by this
Dan Gohman2f524582010-01-21 21:31:09 +0000342/// formula. This does not include register uses implied by non-constant
343/// addrec strides.
Dan Gohmana10756e2010-01-21 02:09:26 +0000344unsigned Formula::getNumRegs() const {
345 return !!ScaledReg + BaseRegs.size();
346}
347
348/// getComplexity - Return an oversimplified value indicating the complexity
349/// of this formula. This is used as a tie-breaker in choosing register
350/// preferences.
351uint32_t Formula::getComplexity() const {
352 // Encode the information in a uint32_t so that comparing with operator<
353 // will be interesting.
354 return
355 // Most significant, the number of registers. This saturates because we
356 // need the bits, and because beyond a few hundred it doesn't really matter.
357 (std::min(getNumRegs(), (1u<<15)-1) << 17) |
358 // Having multiple base regs is worse than having a base reg and a scale.
359 ((BaseRegs.size() > 1) << 16) |
360 // Scale absolute value.
361 ((AM.Scale != 0 ? (Log2_64(abs64(AM.Scale)) + 1) : 0u) << 9) |
362 // Scale sign, which is less significant than the absolute value.
363 ((AM.Scale < 0) << 8) |
364 // Offset absolute value.
365 ((AM.BaseOffs != 0 ? (Log2_64(abs64(AM.BaseOffs)) + 1) : 0u) << 1) |
366 // If a GV is present, treat it like a maximal offset.
367 ((AM.BaseGV ? ((1u<<7)-1) : 0) << 1) |
368 // Offset sign, which is less significant than the absolute offset.
369 ((AM.BaseOffs < 0) << 0);
370}
371
372/// getType - Return the type of this formula, if it has one, or null
373/// otherwise. This type is meaningless except for the bit size.
374const Type *Formula::getType() const {
375 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
376 ScaledReg ? ScaledReg->getType() :
377 AM.BaseGV ? AM.BaseGV->getType() :
378 0;
379}
380
381namespace {
382
383/// ComplexitySorter - A predicate which orders Formulae by the number of
384/// registers they contain.
385struct ComplexitySorter {
386 bool operator()(const Formula &LHS, const Formula &RHS) const {
387 unsigned L = LHS.getNumRegs();
388 unsigned R = RHS.getNumRegs();
389 if (L != R) return L < R;
390
391 return LHS.getComplexity() < RHS.getComplexity();
392 }
393};
394
395}
396
397/// DoInitialMatch - Recurrsion helper for InitialMatch.
398static void DoInitialMatch(const SCEV *S, Loop *L,
399 SmallVectorImpl<const SCEV *> &Good,
400 SmallVectorImpl<const SCEV *> &Bad,
401 ScalarEvolution &SE, DominatorTree &DT) {
402 // Collect expressions which properly dominate the loop header.
403 if (S->properlyDominates(L->getHeader(), &DT)) {
404 Good.push_back(S);
405 return;
406 }
407
408 // Look at add operands.
409 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
410 for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
411 I != E; ++I)
412 DoInitialMatch(*I, L, Good, Bad, SE, DT);
413 return;
414 }
415
416 // Look at addrec operands.
417 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
418 if (!AR->getStart()->isZero()) {
419 DoInitialMatch(AR->getStart(), L, Good, Bad, SE, DT);
420 DoInitialMatch(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()),
421 AR->getStepRecurrence(SE),
422 AR->getLoop()),
423 L, Good, Bad, SE, DT);
424 return;
425 }
426 }
427
428 // Handle a multiplication by -1 (negation) if it didn't fold.
429 if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
430 if (Mul->getOperand(0)->isAllOnesValue()) {
431 SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end());
432 const SCEV *NewMul = SE.getMulExpr(Ops);
433
434 SmallVector<const SCEV *, 4> MyGood;
435 SmallVector<const SCEV *, 4> MyBad;
436 DoInitialMatch(NewMul, L, MyGood, MyBad, SE, DT);
437 const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
438 SE.getEffectiveSCEVType(NewMul->getType())));
439 for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(),
440 E = MyGood.end(); I != E; ++I)
441 Good.push_back(SE.getMulExpr(NegOne, *I));
442 for (SmallVectorImpl<const SCEV *>::const_iterator I = MyBad.begin(),
443 E = MyBad.end(); I != E; ++I)
444 Bad.push_back(SE.getMulExpr(NegOne, *I));
445 return;
446 }
447
448 // Ok, we can't do anything interesting. Just stuff the whole thing into a
449 // register and hope for the best.
450 Bad.push_back(S);
451}
452
453/// InitialMatch - Incorporate loop-variant parts of S into this Formula,
454/// attempting to keep all loop-invariant and loop-computable values in a
455/// single base register.
456void Formula::InitialMatch(const SCEV *S, Loop *L,
457 ScalarEvolution &SE, DominatorTree &DT) {
458 SmallVector<const SCEV *, 4> Good;
459 SmallVector<const SCEV *, 4> Bad;
460 DoInitialMatch(S, L, Good, Bad, SE, DT);
461 if (!Good.empty()) {
462 BaseRegs.push_back(SE.getAddExpr(Good));
463 AM.HasBaseReg = true;
464 }
465 if (!Bad.empty()) {
466 BaseRegs.push_back(SE.getAddExpr(Bad));
467 AM.HasBaseReg = true;
468 }
469}
470
471void Formula::print(raw_ostream &OS) const {
472 bool First = true;
473 if (AM.BaseGV) {
474 if (!First) OS << " + "; else First = false;
475 WriteAsOperand(OS, AM.BaseGV, /*PrintType=*/false);
476 }
477 if (AM.BaseOffs != 0) {
478 if (!First) OS << " + "; else First = false;
479 OS << AM.BaseOffs;
480 }
481 for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
482 E = BaseRegs.end(); I != E; ++I) {
483 if (!First) OS << " + "; else First = false;
484 OS << "reg(";
485 OS << **I;
486 OS << ")";
487 }
488 if (AM.Scale != 0) {
489 if (!First) OS << " + "; else First = false;
490 OS << AM.Scale << "*reg(";
491 if (ScaledReg)
492 OS << *ScaledReg;
493 else
494 OS << "<unknown>";
495 OS << ")";
496 }
497}
498
499void Formula::dump() const {
500 print(errs()); errs() << '\n';
501}
502
503/// getSDiv - Return an expression for LHS /s RHS, if it can be determined,
504/// or null otherwise. If IgnoreSignificantBits is true, expressions like
505/// (X * Y) /s Y are simplified to Y, ignoring that the multiplication may
506/// overflow, which is useful when the result will be used in a context where
507/// the most significant bits are ignored.
508static const SCEV *getSDiv(const SCEV *LHS, const SCEV *RHS,
509 ScalarEvolution &SE,
510 bool IgnoreSignificantBits = false) {
511 // Handle the trivial case, which works for any SCEV type.
512 if (LHS == RHS)
513 return SE.getIntegerSCEV(1, LHS->getType());
514
515 // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do some
516 // folding.
517 if (RHS->isAllOnesValue())
518 return SE.getMulExpr(LHS, RHS);
519
520 // Check for a division of a constant by a constant.
521 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
522 const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
523 if (!RC)
524 return 0;
525 if (C->getValue()->getValue().srem(RC->getValue()->getValue()) != 0)
526 return 0;
527 return SE.getConstant(C->getValue()->getValue()
528 .sdiv(RC->getValue()->getValue()));
529 }
530
531 // Distribute the sdiv over addrec operands.
532 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
533 const SCEV *Start = getSDiv(AR->getStart(), RHS, SE,
534 IgnoreSignificantBits);
535 if (!Start) return 0;
536 const SCEV *Step = getSDiv(AR->getStepRecurrence(SE), RHS, SE,
537 IgnoreSignificantBits);
538 if (!Step) return 0;
539 return SE.getAddRecExpr(Start, Step, AR->getLoop());
540 }
541
542 // Distribute the sdiv over add operands.
543 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
544 SmallVector<const SCEV *, 8> Ops;
545 for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
546 I != E; ++I) {
547 const SCEV *Op = getSDiv(*I, RHS, SE,
548 IgnoreSignificantBits);
549 if (!Op) return 0;
550 Ops.push_back(Op);
551 }
552 return SE.getAddExpr(Ops);
553 }
554
555 // Check for a multiply operand that we can pull RHS out of.
556 if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS))
557 if (IgnoreSignificantBits || Mul->hasNoSignedWrap()) {
558 SmallVector<const SCEV *, 4> Ops;
559 bool Found = false;
560 for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end();
561 I != E; ++I) {
562 if (!Found)
563 if (const SCEV *Q = getSDiv(*I, RHS, SE, IgnoreSignificantBits)) {
564 Ops.push_back(Q);
565 Found = true;
566 continue;
567 }
568 Ops.push_back(*I);
569 }
570 return Found ? SE.getMulExpr(Ops) : 0;
571 }
572
573 // Otherwise we don't know.
574 return 0;
575}
576
577namespace {
578
579/// LSRUse - This class holds the state that LSR keeps for each use in
580/// IVUsers, as well as uses invented by LSR itself. It includes information
581/// about what kinds of things can be folded into the user, information
582/// about the user itself, and information about how the use may be satisfied.
583/// TODO: Represent multiple users of the same expression in common?
584class LSRUse {
585 SmallSet<Formula, 8> FormulaeUniquifier;
586
587public:
588 /// KindType - An enum for a kind of use, indicating what types of
589 /// scaled and immediate operands it might support.
590 enum KindType {
591 Basic, ///< A normal use, with no folding.
592 Special, ///< A special case of basic, allowing -1 scales.
593 Address, ///< An address use; folding according to TargetLowering
594 ICmpZero ///< An equality icmp with both operands folded into one.
595 // TODO: Add a generic icmp too?
596 };
597
598 KindType Kind;
599 const Type *AccessTy;
600 Instruction *UserInst;
601 Value *OperandValToReplace;
602
603 /// PostIncLoop - If this user is to use the post-incremented value of an
604 /// induction variable, this variable is non-null and holds the loop
605 /// associated with the induction variable.
606 const Loop *PostIncLoop;
607
608 /// Formulae - A list of ways to build a value that can satisfy this user.
609 /// After the list is populated, one of these is selected heuristically and
610 /// used to formulate a replacement for OperandValToReplace in UserInst.
611 SmallVector<Formula, 12> Formulae;
612
613 LSRUse() : Kind(Basic), AccessTy(0),
614 UserInst(0), OperandValToReplace(0), PostIncLoop(0) {}
615
616 void InsertInitialFormula(const SCEV *S, Loop *L,
617 ScalarEvolution &SE, DominatorTree &DT);
618 void InsertSupplementalFormula(const SCEV *S);
619
620 bool InsertFormula(const Formula &F);
621
Dan Gohmaneca35b72010-01-21 23:01:22 +0000622 void Rewrite(Loop *L, Instruction *IVIncInsertPos,
623 SCEVExpander &Rewriter,
Dan Gohmana10756e2010-01-21 02:09:26 +0000624 SmallVectorImpl<WeakVH> &DeadInsts,
625 ScalarEvolution &SE, DominatorTree &DT,
626 Pass *P) const;
627
628 void print(raw_ostream &OS) const;
629 void dump() const;
630
631private:
Dan Gohmaneca35b72010-01-21 23:01:22 +0000632 Value *Expand(BasicBlock::iterator IP, Loop *L, Instruction *IVIncInsertPos,
633 SCEVExpander &Rewriter,
Dan Gohmana10756e2010-01-21 02:09:26 +0000634 SmallVectorImpl<WeakVH> &DeadInsts,
635 ScalarEvolution &SE, DominatorTree &DT) const;
636};
637
638}
639
640/// ExtractImmediate - If S involves the addition of a constant integer value,
641/// return that integer value, and mutate S to point to a new SCEV with that
642/// value excluded.
643static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
644 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
645 if (C->getValue()->getValue().getMinSignedBits() <= 64) {
646 S = SE.getIntegerSCEV(0, C->getType());
647 return C->getValue()->getSExtValue();
648 }
649 } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
650 SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
651 int64_t Result = ExtractImmediate(NewOps.front(), SE);
652 S = SE.getAddExpr(NewOps);
653 return Result;
654 } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
655 SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
656 int64_t Result = ExtractImmediate(NewOps.front(), SE);
657 S = SE.getAddRecExpr(NewOps, AR->getLoop());
658 return Result;
659 }
660 return 0;
661}
662
663/// ExtractSymbol - If S involves the addition of a GlobalValue address,
664/// return that symbol, and mutate S to point to a new SCEV with that
665/// value excluded.
666static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
667 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
668 if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
669 S = SE.getIntegerSCEV(0, GV->getType());
670 return GV;
671 }
672 } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
673 SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
674 GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
675 S = SE.getAddExpr(NewOps);
676 return Result;
677 } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
678 SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
679 GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
680 S = SE.getAddRecExpr(NewOps, AR->getLoop());
681 return Result;
682 }
683 return 0;
684}
685
686/// isLegalUse - Test whether the use described by AM is "legal", meaning
687/// it can be completely folded into the user instruction at isel time.
688/// This includes address-mode folding and special icmp tricks.
689static bool isLegalUse(const TargetLowering::AddrMode &AM,
690 LSRUse::KindType Kind, const Type *AccessTy,
691 const TargetLowering *TLI) {
692 switch (Kind) {
693 case LSRUse::Address:
694 // If we have low-level target information, ask the target if it can
695 // completely fold this address.
696 if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy);
697
698 // Otherwise, just guess that reg+reg addressing is legal.
699 return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1;
700
701 case LSRUse::ICmpZero:
702 // There's not even a target hook for querying whether it would be legal
703 // to fold a GV into an ICmp.
704 if (AM.BaseGV)
705 return false;
706
707 // ICmp only has two operands; don't allow more than two non-trivial parts.
708 if (AM.Scale != 0 && AM.HasBaseReg && AM.BaseOffs != 0)
709 return false;
710
711 // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale
712 // by putting the scaled register in the other operand of the icmp.
713 if (AM.Scale != 0 && AM.Scale != -1)
714 return false;
715
716 // If we have low-level target information, ask the target if it can
717 // fold an integer immediate on an icmp.
718 if (AM.BaseOffs != 0) {
719 if (TLI) return TLI->isLegalICmpImmediate(-AM.BaseOffs);
720 return false;
721 }
722
723 return true;
724
725 case LSRUse::Basic:
726 // Only handle single-register values.
727 return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
728
729 case LSRUse::Special:
730 // Only handle -1 scales, or no scale.
731 return AM.Scale == 0 || AM.Scale == -1;
732 }
733
734 return false;
735}
736
737static bool isAlwaysFoldable(const SCEV *S,
738 bool HasBaseReg,
739 LSRUse::KindType Kind, const Type *AccessTy,
740 const TargetLowering *TLI,
741 ScalarEvolution &SE) {
742 // Fast-path: zero is always foldable.
743 if (S->isZero()) return true;
744
745 // Conservatively, create an address with an immediate and a
746 // base and a scale.
747 TargetLowering::AddrMode AM;
748 AM.BaseOffs = ExtractImmediate(S, SE);
749 AM.BaseGV = ExtractSymbol(S, SE);
750 AM.HasBaseReg = HasBaseReg;
751 AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
752
753 // If there's anything else involved, it's not foldable.
754 if (!S->isZero()) return false;
755
756 return isLegalUse(AM, Kind, AccessTy, TLI);
757}
758
759/// InsertFormula - If the given formula has not yet been inserted, add it
760/// to the list, and return true. Return false otherwise.
761bool LSRUse::InsertFormula(const Formula &F) {
762 Formula Copy = F;
763
764 // Sort the base regs, to avoid adding the same solution twice with
765 // the base regs in different orders. This uses host pointer values, but
766 // it doesn't matter since it's only used for uniquifying.
767 std::sort(Copy.BaseRegs.begin(), Copy.BaseRegs.end());
768
769 DEBUG(for (SmallVectorImpl<const SCEV *>::const_iterator I =
770 F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I)
771 assert(!(*I)->isZero() && "Zero allocated in a base register!");
772 assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
773 "Zero allocated in a scaled register!"));
774
775 if (FormulaeUniquifier.insert(Copy)) {
776 Formulae.push_back(F);
777 return true;
778 }
779
780 return false;
781}
782
783void
784LSRUse::InsertInitialFormula(const SCEV *S, Loop *L,
785 ScalarEvolution &SE, DominatorTree &DT) {
786 Formula F;
787 F.InitialMatch(S, L, SE, DT);
788 bool Inserted = InsertFormula(F);
789 assert(Inserted && "Initial formula already exists!"); (void)Inserted;
790}
791
792void
793LSRUse::InsertSupplementalFormula(const SCEV *S) {
794 Formula F;
795 F.BaseRegs.push_back(S);
796 F.AM.HasBaseReg = true;
797 bool Inserted = InsertFormula(F);
798 assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
799}
800
801/// getImmediateDominator - A handy utility for the specific DominatorTree
802/// query that we need here.
803///
804static BasicBlock *getImmediateDominator(BasicBlock *BB, DominatorTree &DT) {
805 DomTreeNode *Node = DT.getNode(BB);
806 if (!Node) return 0;
807 Node = Node->getIDom();
808 if (!Node) return 0;
809 return Node->getBlock();
810}
811
812Value *LSRUse::Expand(BasicBlock::iterator IP,
Dan Gohmaneca35b72010-01-21 23:01:22 +0000813 Loop *L, Instruction *IVIncInsertPos,
814 SCEVExpander &Rewriter,
Dan Gohmana10756e2010-01-21 02:09:26 +0000815 SmallVectorImpl<WeakVH> &DeadInsts,
816 ScalarEvolution &SE, DominatorTree &DT) const {
817 // Then, collect some instructions which we will remain dominated by when
818 // expanding the replacement. These must be dominated by any operands that
819 // will be required in the expansion.
820 SmallVector<Instruction *, 4> Inputs;
821 if (Instruction *I = dyn_cast<Instruction>(OperandValToReplace))
822 Inputs.push_back(I);
823 if (Kind == ICmpZero)
824 if (Instruction *I =
825 dyn_cast<Instruction>(cast<ICmpInst>(UserInst)->getOperand(1)))
826 Inputs.push_back(I);
827 if (PostIncLoop && !L->contains(UserInst))
828 Inputs.push_back(L->getLoopLatch()->getTerminator());
829
830 // Then, climb up the immediate dominator tree as far as we can go while
831 // still being dominated by the input positions.
832 for (;;) {
833 bool AllDominate = true;
834 Instruction *BetterPos = 0;
835 BasicBlock *IDom = getImmediateDominator(IP->getParent(), DT);
836 if (!IDom) break;
837 Instruction *Tentative = IDom->getTerminator();
838 for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
839 E = Inputs.end(); I != E; ++I) {
840 Instruction *Inst = *I;
841 if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
842 AllDominate = false;
843 break;
844 }
845 if (IDom == Inst->getParent() &&
846 (!BetterPos || DT.dominates(BetterPos, Inst)))
847 BetterPos = next(BasicBlock::iterator(Inst));
848 }
849 if (!AllDominate)
850 break;
851 if (BetterPos)
852 IP = BetterPos;
853 else
854 IP = Tentative;
855 }
856 while (isa<PHINode>(IP)) ++IP;
857
858 // The first formula in the list is the winner.
859 const Formula &F = Formulae.front();
860
861 // Inform the Rewriter if we have a post-increment use, so that it can
862 // perform an advantageous expansion.
863 Rewriter.setPostInc(PostIncLoop);
864
865 // This is the type that the user actually needs.
866 const Type *OpTy = OperandValToReplace->getType();
867 // This will be the type that we'll initially expand to.
868 const Type *Ty = F.getType();
869 if (!Ty)
870 // No type known; just expand directly to the ultimate type.
871 Ty = OpTy;
872 else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
873 // Expand directly to the ultimate type if it's the right size.
874 Ty = OpTy;
875 // This is the type to do integer arithmetic in.
876 const Type *IntTy = SE.getEffectiveSCEVType(Ty);
877
878 // Build up a list of operands to add together to form the full base.
879 SmallVector<const SCEV *, 8> Ops;
880
881 // Expand the BaseRegs portion.
882 for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
883 E = F.BaseRegs.end(); I != E; ++I) {
884 const SCEV *Reg = *I;
885 assert(!Reg->isZero() && "Zero allocated in a base register!");
886
887 // If we're expanding for a post-inc user for the add-rec's loop, make the
888 // post-inc adjustment.
889 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg))
Dan Gohmaneca35b72010-01-21 23:01:22 +0000890 if (AR->getLoop() == PostIncLoop) {
Dan Gohmana10756e2010-01-21 02:09:26 +0000891 Reg = SE.getAddExpr(Reg, AR->getStepRecurrence(SE));
Dan Gohmaneca35b72010-01-21 23:01:22 +0000892 // If the user is inside the loop, insert the code after the increment
893 // so that it is dominated by its operand.
894 if (L->contains(UserInst))
895 IP = IVIncInsertPos;
896 }
Dan Gohmana10756e2010-01-21 02:09:26 +0000897
898 Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
899 }
900
901 // Expand the ScaledReg portion.
902 Value *ICmpScaledV = 0;
903 if (F.AM.Scale != 0) {
904 const SCEV *ScaledS = F.ScaledReg;
905
906 // If we're expanding for a post-inc user for the add-rec's loop, make the
907 // post-inc adjustment.
908 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ScaledS))
909 if (AR->getLoop() == PostIncLoop)
910 ScaledS = SE.getAddExpr(ScaledS, AR->getStepRecurrence(SE));
911
912 if (Kind == ICmpZero) {
913 // An interesting way of "folding" with an icmp is to use a negated
914 // scale, which we'll implement by inserting it into the other operand
915 // of the icmp.
916 assert(F.AM.Scale == -1 &&
917 "The only scale supported by ICmpZero uses is -1!");
918 ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP);
919 } else {
920 // Otherwise just expand the scaled register and an explicit scale,
921 // which is expected to be matched as part of the address.
922 ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
923 const Type *ScaledTy = SE.getEffectiveSCEVType(ScaledS->getType());
924 ScaledS = SE.getMulExpr(ScaledS,
925 SE.getSCEV(ConstantInt::get(ScaledTy,
926 F.AM.Scale)));
927 Ops.push_back(ScaledS);
928 }
929 }
930
931 // Expand the immediate portions.
932 if (F.AM.BaseGV)
933 Ops.push_back(SE.getSCEV(F.AM.BaseGV));
934 if (F.AM.BaseOffs != 0) {
935 if (Kind == ICmpZero) {
936 // The other interesting way of "folding" with an ICmpZero is to use a
937 // negated immediate.
938 if (!ICmpScaledV)
939 ICmpScaledV = ConstantInt::get(IntTy, -F.AM.BaseOffs);
940 else {
941 Ops.push_back(SE.getUnknown(ICmpScaledV));
942 ICmpScaledV = ConstantInt::get(IntTy, F.AM.BaseOffs);
943 }
944 } else {
945 // Just add the immediate values. These again are expected to be matched
946 // as part of the address.
947 Ops.push_back(SE.getSCEV(ConstantInt::get(IntTy, F.AM.BaseOffs)));
948 }
949 }
950
951 // Emit instructions summing all the operands.
952 const SCEV *FullS = Ops.empty() ?
953 SE.getIntegerSCEV(0, IntTy) :
954 SE.getAddExpr(Ops);
955 Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
956
957 // We're done expanding now, so reset the rewriter.
958 Rewriter.setPostInc(0);
959
960 // An ICmpZero Formula represents an ICmp which we're handling as a
961 // comparison against zero. Now that we've expanded an expression for that
962 // form, update the ICmp's other operand.
963 if (Kind == ICmpZero) {
964 ICmpInst *CI = cast<ICmpInst>(UserInst);
965 DeadInsts.push_back(CI->getOperand(1));
966 assert(!F.AM.BaseGV && "ICmp does not support folding a global value and "
967 "a scale at the same time!");
968 if (F.AM.Scale == -1) {
969 if (ICmpScaledV->getType() != OpTy) {
970 Instruction *Cast =
971 CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
972 OpTy, false),
973 ICmpScaledV, OpTy, "tmp", CI);
974 ICmpScaledV = Cast;
975 }
976 CI->setOperand(1, ICmpScaledV);
977 } else {
978 assert(F.AM.Scale == 0 &&
979 "ICmp does not support folding a global value and "
980 "a scale at the same time!");
981 Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
982 -(uint64_t)F.AM.BaseOffs);
983 if (C->getType() != OpTy)
984 C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
985 OpTy, false),
986 C, OpTy);
987
988 CI->setOperand(1, C);
989 }
990 }
991
992 return FullV;
993}
994
995/// Rewrite - Emit instructions for the leading candidate expression for this
996/// LSRUse (this is called "expanding"), and update the UserInst to reference
997/// the newly expanded value.
Dan Gohmaneca35b72010-01-21 23:01:22 +0000998void LSRUse::Rewrite(Loop *L, Instruction *IVIncInsertPos,
999 SCEVExpander &Rewriter,
Dan Gohmana10756e2010-01-21 02:09:26 +00001000 SmallVectorImpl<WeakVH> &DeadInsts,
1001 ScalarEvolution &SE, DominatorTree &DT,
1002 Pass *P) const {
1003 const Type *OpTy = OperandValToReplace->getType();
1004
1005 // First, find an insertion point that dominates UserInst. For PHI nodes,
1006 // find the nearest block which dominates all the relevant uses.
1007 if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1008 DenseMap<BasicBlock *, Value *> Inserted;
1009 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1010 if (PN->getIncomingValue(i) == OperandValToReplace) {
1011 BasicBlock *BB = PN->getIncomingBlock(i);
1012
1013 // If this is a critical edge, split the edge so that we do not insert
1014 // the code on all predecessor/successor paths. We do this unless this
1015 // is the canonical backedge for this loop, which complicates post-inc
1016 // users.
1017 if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
1018 !isa<IndirectBrInst>(BB->getTerminator()) &&
1019 (PN->getParent() != L->getHeader() || !L->contains(BB))) {
1020 // Split the critical edge.
1021 BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
1022
1023 // If PN is outside of the loop and BB is in the loop, we want to
1024 // move the block to be immediately before the PHI block, not
1025 // immediately after BB.
1026 if (L->contains(BB) && !L->contains(PN))
1027 NewBB->moveBefore(PN->getParent());
1028
1029 // Splitting the edge can reduce the number of PHI entries we have.
1030 e = PN->getNumIncomingValues();
1031 BB = NewBB;
1032 i = PN->getBasicBlockIndex(BB);
1033 }
1034
1035 std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
1036 Inserted.insert(std::make_pair(BB, static_cast<Value *>(0)));
1037 if (!Pair.second)
1038 PN->setIncomingValue(i, Pair.first->second);
1039 else {
Dan Gohmaneca35b72010-01-21 23:01:22 +00001040 Value *FullV = Expand(BB->getTerminator(), L, IVIncInsertPos,
1041 Rewriter, DeadInsts, SE, DT);
Dan Gohmana10756e2010-01-21 02:09:26 +00001042
1043 // If this is reuse-by-noop-cast, insert the noop cast.
1044 if (FullV->getType() != OpTy)
1045 FullV =
1046 CastInst::Create(CastInst::getCastOpcode(FullV, false,
1047 OpTy, false),
1048 FullV, OperandValToReplace->getType(),
1049 "tmp", BB->getTerminator());
1050
1051 PN->setIncomingValue(i, FullV);
1052 Pair.first->second = FullV;
1053 }
1054 }
1055 } else {
Dan Gohmaneca35b72010-01-21 23:01:22 +00001056 Value *FullV = Expand(UserInst, L, IVIncInsertPos,
1057 Rewriter, DeadInsts, SE, DT);
Dan Gohmana10756e2010-01-21 02:09:26 +00001058
1059 // If this is reuse-by-noop-cast, insert the noop cast.
1060 if (FullV->getType() != OpTy) {
1061 Instruction *Cast =
1062 CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
1063 FullV, OpTy, "tmp", UserInst);
1064 FullV = Cast;
1065 }
1066
1067 // Update the user.
1068 UserInst->replaceUsesOfWith(OperandValToReplace, FullV);
1069 }
1070
1071 DeadInsts.push_back(OperandValToReplace);
1072}
1073
1074void LSRUse::print(raw_ostream &OS) const {
1075 OS << "LSR Use: Kind=";
1076 switch (Kind) {
1077 case Basic: OS << "Basic"; break;
1078 case Special: OS << "Special"; break;
1079 case ICmpZero: OS << "ICmpZero"; break;
1080 case Address:
1081 OS << "Address of ";
1082 if (isa<PointerType>(AccessTy))
1083 OS << "pointer"; // the full pointer type could be really verbose
1084 else
1085 OS << *AccessTy;
1086 }
1087
1088 OS << ", UserInst=";
1089 // Store is common and interesting enough to be worth special-casing.
1090 if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1091 OS << "store ";
1092 WriteAsOperand(OS, Store->getOperand(0), /*PrintType=*/false);
1093 } else if (UserInst->getType()->isVoidTy())
1094 OS << UserInst->getOpcodeName();
1095 else
1096 WriteAsOperand(OS, UserInst, /*PrintType=*/false);
1097
1098 OS << ", OperandValToReplace=";
1099 WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false);
1100
1101 if (PostIncLoop) {
1102 OS << ", PostIncLoop=";
1103 WriteAsOperand(OS, PostIncLoop->getHeader(), /*PrintType=*/false);
1104 }
1105}
1106
1107void LSRUse::dump() const {
1108 print(errs()); errs() << '\n';
1109}
1110
1111namespace {
1112
1113/// Score - This class is used to measure and compare candidate formulae.
1114class Score {
1115 unsigned NumRegs;
1116 unsigned NumPhis;
1117 unsigned NumIVMuls;
1118 unsigned NumBaseAdds;
1119 unsigned NumImms;
1120
1121public:
1122 Score()
1123 : NumRegs(0), NumPhis(0), NumIVMuls(0), NumBaseAdds(0), NumImms(0) {}
1124
1125 void RateInitial(SmallVector<LSRUse, 16> const &Uses, const Loop *L,
1126 ScalarEvolution &SE);
1127
1128 void Rate(const SCEV *Reg, const SmallBitVector &Bits,
1129 const SmallVector<LSRUse, 16> &Uses, const Loop *L,
1130 ScalarEvolution &SE);
1131
Dan Gohman8b0ade32010-01-21 22:42:49 +00001132 unsigned getNumRegs() const { return NumRegs; }
1133
Dan Gohmana10756e2010-01-21 02:09:26 +00001134 bool operator<(const Score &Other) const;
1135
1136 void print_details(raw_ostream &OS, const SCEV *Reg,
1137 const SmallPtrSet<const SCEV *, 8> &Regs) const;
1138
1139 void print(raw_ostream &OS) const;
1140 void dump() const;
1141
1142private:
1143 void RateRegister(const SCEV *Reg, SmallPtrSet<const SCEV *, 8> &Regs,
1144 const Loop *L);
1145 void RateFormula(const Formula &F, SmallPtrSet<const SCEV *, 8> &Regs,
1146 const Loop *L);
1147
1148 void Loose();
1149};
1150
1151}
1152
1153/// RateRegister - Tally up interesting quantities from the given register.
1154void Score::RateRegister(const SCEV *Reg,
1155 SmallPtrSet<const SCEV *, 8> &Regs,
1156 const Loop *L) {
1157 if (Regs.insert(Reg))
1158 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
1159 NumPhis += AR->getLoop() == L;
1160
1161 // Add the step value register, if it needs one.
1162 if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1)))
1163 RateRegister(AR->getOperand(1), Regs, L);
1164 }
1165}
1166
1167void Score::RateFormula(const Formula &F,
1168 SmallPtrSet<const SCEV *, 8> &Regs,
1169 const Loop *L) {
1170 // Tally up the registers.
1171 if (F.ScaledReg)
1172 RateRegister(F.ScaledReg, Regs, L);
1173 for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
1174 E = F.BaseRegs.end(); I != E; ++I) {
1175 const SCEV *BaseReg = *I;
1176 RateRegister(BaseReg, Regs, L);
1177
1178 NumIVMuls += isa<SCEVMulExpr>(BaseReg) &&
1179 BaseReg->hasComputableLoopEvolution(L);
1180 }
1181
1182 if (F.BaseRegs.size() > 1)
1183 NumBaseAdds += F.BaseRegs.size() - 1;
1184
1185 // Tally up the non-zero immediates.
1186 if (F.AM.BaseGV || F.AM.BaseOffs != 0)
1187 ++NumImms;
1188}
1189
1190/// Loose - Set this score to a loosing value.
1191void Score::Loose() {
1192 NumRegs = ~0u;
1193 NumPhis = ~0u;
1194 NumIVMuls = ~0u;
1195 NumBaseAdds = ~0u;
1196 NumImms = ~0u;
1197}
1198
1199/// RateInitial - Compute a score for the initial "fully reduced" solution.
1200void Score::RateInitial(SmallVector<LSRUse, 16> const &Uses, const Loop *L,
1201 ScalarEvolution &SE) {
1202 SmallPtrSet<const SCEV *, 8> Regs;
1203 for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
1204 E = Uses.end(); I != E; ++I)
1205 RateFormula(I->Formulae.front(), Regs, L);
1206 NumRegs += Regs.size();
1207
1208 DEBUG(print_details(dbgs(), 0, Regs));
1209}
1210
1211/// Rate - Compute a score for the solution where the reuse associated with
1212/// putting Reg in a register is selected.
1213void Score::Rate(const SCEV *Reg, const SmallBitVector &Bits,
1214 const SmallVector<LSRUse, 16> &Uses, const Loop *L,
1215 ScalarEvolution &SE) {
1216 SmallPtrSet<const SCEV *, 8> Regs;
1217 for (size_t i = 0, e = Uses.size(); i != e; ++i) {
1218 const LSRUse &LU = Uses[i];
1219
1220 const Formula *BestFormula = 0;
1221 if (i >= Bits.size() || !Bits.test(i))
1222 // This use doesn't use the current register. Just go with the current
1223 // leading candidate formula.
1224 BestFormula = &LU.Formulae.front();
1225 else
1226 // Find the best formula for this use that uses the current register.
1227 for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
1228 E = LU.Formulae.end(); I != E; ++I) {
1229 const Formula &F = *I;
1230 if (F.referencesReg(Reg) &&
1231 (!BestFormula || ComplexitySorter()(F, *BestFormula)))
1232 BestFormula = &F;
1233 }
1234
1235 // If we didn't find *any* forumlae, because earlier we eliminated some
1236 // in greedy fashion, skip the current register's reuse opportunity.
1237 if (!BestFormula) {
1238 DEBUG(dbgs() << "Reuse with reg " << *Reg
1239 << " wouldn't help any users.\n");
1240 Loose();
1241 return;
1242 }
1243
1244 // For an in-loop post-inc user, don't allow multiple base registers,
1245 // because that would require an awkward in-loop add after the increment.
1246 if (LU.PostIncLoop && LU.PostIncLoop->contains(LU.UserInst) &&
1247 BestFormula->BaseRegs.size() > 1) {
1248 DEBUG(dbgs() << "Reuse with reg " << *Reg
1249 << " would require an in-loop post-inc add: ";
1250 BestFormula->dump());
1251 Loose();
1252 return;
1253 }
1254
1255 RateFormula(*BestFormula, Regs, L);
1256 }
1257 NumRegs += Regs.size();
1258
1259 DEBUG(print_details(dbgs(), Reg, Regs));
1260}
1261
1262/// operator< - Choose the better score.
1263bool Score::operator<(const Score &Other) const {
1264 if (NumRegs != Other.NumRegs)
1265 return NumRegs < Other.NumRegs;
1266 if (NumPhis != Other.NumPhis)
1267 return NumPhis < Other.NumPhis;
1268 if (NumIVMuls != Other.NumIVMuls)
1269 return NumIVMuls < Other.NumIVMuls;
1270 if (NumBaseAdds != Other.NumBaseAdds)
1271 return NumBaseAdds < Other.NumBaseAdds;
1272 return NumImms < Other.NumImms;
1273}
1274
1275void Score::print_details(raw_ostream &OS,
1276 const SCEV *Reg,
1277 const SmallPtrSet<const SCEV *, 8> &Regs) const {
1278 if (Reg) OS << "Reuse with reg " << *Reg << " would require ";
1279 else OS << "The initial solution would require ";
1280 print(OS);
1281 OS << ". Regs:";
1282 for (SmallPtrSet<const SCEV *, 8>::const_iterator I = Regs.begin(),
1283 E = Regs.end(); I != E; ++I)
1284 OS << ' ' << **I;
1285 OS << '\n';
1286}
1287
1288void Score::print(raw_ostream &OS) const {
1289 OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
1290 if (NumPhis != 0)
1291 OS << ", including " << NumPhis << " PHI" << (NumPhis == 1 ? "" : "s");
1292 if (NumIVMuls != 0)
1293 OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s");
1294 if (NumBaseAdds != 0)
1295 OS << ", plus " << NumBaseAdds << " base add"
1296 << (NumBaseAdds == 1 ? "" : "s");
1297 if (NumImms != 0)
1298 OS << ", plus " << NumImms << " imm" << (NumImms == 1 ? "" : "s");
1299}
1300
1301void Score::dump() const {
1302 print(errs()); errs() << '\n';
Nate Begemaneaa13852004-10-18 21:08:22 +00001303}
1304
Dan Gohmanf284ce22009-02-18 00:08:39 +00001305/// isAddressUse - Returns true if the specified instruction is using the
Dale Johannesen203af582008-12-05 21:47:27 +00001306/// specified value as an address.
1307static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
1308 bool isAddress = isa<LoadInst>(Inst);
1309 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
1310 if (SI->getOperand(1) == OperandVal)
1311 isAddress = true;
1312 } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
1313 // Addressing modes can also be folded into prefetches and a variety
1314 // of intrinsics.
1315 switch (II->getIntrinsicID()) {
1316 default: break;
1317 case Intrinsic::prefetch:
1318 case Intrinsic::x86_sse2_loadu_dq:
1319 case Intrinsic::x86_sse2_loadu_pd:
1320 case Intrinsic::x86_sse_loadu_ps:
1321 case Intrinsic::x86_sse_storeu_ps:
1322 case Intrinsic::x86_sse2_storeu_pd:
1323 case Intrinsic::x86_sse2_storeu_dq:
1324 case Intrinsic::x86_sse2_storel_dq:
1325 if (II->getOperand(1) == OperandVal)
1326 isAddress = true;
1327 break;
1328 }
1329 }
1330 return isAddress;
1331}
Chris Lattner0ae33eb2005-10-03 01:04:44 +00001332
Dan Gohman21e77222009-03-09 21:01:17 +00001333/// getAccessType - Return the type of the memory being accessed.
1334static const Type *getAccessType(const Instruction *Inst) {
Dan Gohmana537bf82009-05-18 16:45:28 +00001335 const Type *AccessTy = Inst->getType();
Dan Gohman21e77222009-03-09 21:01:17 +00001336 if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
Dan Gohmana537bf82009-05-18 16:45:28 +00001337 AccessTy = SI->getOperand(0)->getType();
Dan Gohman21e77222009-03-09 21:01:17 +00001338 else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
1339 // Addressing modes can also be folded into prefetches and a variety
1340 // of intrinsics.
1341 switch (II->getIntrinsicID()) {
1342 default: break;
1343 case Intrinsic::x86_sse_storeu_ps:
1344 case Intrinsic::x86_sse2_storeu_pd:
1345 case Intrinsic::x86_sse2_storeu_dq:
1346 case Intrinsic::x86_sse2_storel_dq:
Dan Gohmana537bf82009-05-18 16:45:28 +00001347 AccessTy = II->getOperand(1)->getType();
Dan Gohman21e77222009-03-09 21:01:17 +00001348 break;
1349 }
1350 }
Dan Gohmana537bf82009-05-18 16:45:28 +00001351 return AccessTy;
Dan Gohman21e77222009-03-09 21:01:17 +00001352}
1353
Dan Gohmana10756e2010-01-21 02:09:26 +00001354/// DeleteTriviallyDeadInstructions - If any of the instructions is the
1355/// specified set are trivially dead, delete them and see if this makes any of
1356/// their operands subsequently dead.
1357static bool
1358DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
1359 bool Changed = false;
Jim Grosbach56a1f802009-11-17 17:53:56 +00001360
Dan Gohmana10756e2010-01-21 02:09:26 +00001361 while (!DeadInsts.empty()) {
1362 Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
Nate Begeman16997482005-07-30 00:15:07 +00001363
Dan Gohmana10756e2010-01-21 02:09:26 +00001364 if (I == 0 || !isInstructionTriviallyDead(I))
Evan Cheng55e641b2008-03-19 22:02:26 +00001365 continue;
Jim Grosbach56a1f802009-11-17 17:53:56 +00001366
Dan Gohmana10756e2010-01-21 02:09:26 +00001367 for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
1368 if (Instruction *U = dyn_cast<Instruction>(*OI)) {
1369 *OI = 0;
1370 if (U->use_empty())
1371 DeadInsts.push_back(U);
Dan Gohman2acc7602007-05-03 23:20:33 +00001372 }
Dan Gohman02e4fa72007-10-22 20:40:42 +00001373
Dan Gohmana10756e2010-01-21 02:09:26 +00001374 I->eraseFromParent();
Dan Gohmanafc36a92009-05-02 18:29:22 +00001375 Changed = true;
Evan Chengcdf43b12007-10-25 09:11:16 +00001376 }
1377
Dan Gohmana10756e2010-01-21 02:09:26 +00001378 return Changed;
Evan Chengcdf43b12007-10-25 09:11:16 +00001379}
1380
Dan Gohmana10756e2010-01-21 02:09:26 +00001381namespace {
Dan Gohmanad7321f2008-09-15 21:22:06 +00001382
Dan Gohmana10756e2010-01-21 02:09:26 +00001383/// LSRInstance - This class holds state for the main loop strength
1384/// reduction logic.
1385class LSRInstance {
1386 IVUsers &IU;
1387 ScalarEvolution &SE;
1388 DominatorTree &DT;
1389 const TargetLowering *const TLI;
1390 Loop *const L;
1391 bool Changed;
Dan Gohmanad7321f2008-09-15 21:22:06 +00001392
Dan Gohmaneca35b72010-01-21 23:01:22 +00001393 /// IVIncInsertPos - This is the insert position that the current loop's
1394 /// induction variable increment should be placed. In simple loops, this is
1395 /// the latch block's terminator. But in more complicated cases, this is
1396 /// a position which will dominate all the in-loop post-increment users.
1397 Instruction *IVIncInsertPos;
1398
Dan Gohmana10756e2010-01-21 02:09:26 +00001399 /// CurrentArbitraryRegIndex - To ensure a deterministic ordering, assign an
1400 /// arbitrary index value to each register as a sort tie breaker.
1401 unsigned CurrentArbitraryRegIndex;
Dan Gohmanad7321f2008-09-15 21:22:06 +00001402
Dan Gohman8b0ade32010-01-21 22:42:49 +00001403 /// MaxNumRegs - To help prune the search for solutions, identify the number
1404 /// of registers needed by the initial solution. No formula should require
1405 /// more than this.
1406 unsigned MaxNumRegs;
1407
Dan Gohmana10756e2010-01-21 02:09:26 +00001408 /// Factors - Interesting factors between use strides.
1409 SmallSetVector<int64_t, 4> Factors;
Dan Gohmanad7321f2008-09-15 21:22:06 +00001410
Dan Gohmana10756e2010-01-21 02:09:26 +00001411 /// Types - Interesting use types, to facilitate truncation reuse.
1412 SmallSetVector<const Type *, 4> Types;
Dan Gohmanad7321f2008-09-15 21:22:06 +00001413
Dan Gohmana10756e2010-01-21 02:09:26 +00001414 /// Uses - The list of interesting uses.
1415 SmallVector<LSRUse, 16> Uses;
Dan Gohman4d1c1ef2009-06-19 23:03:46 +00001416
Dan Gohmana10756e2010-01-21 02:09:26 +00001417 // TODO: Reorganize these data structures.
1418 typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
1419 RegUsesTy RegUses;
1420 SmallVector<const SCEV *, 16> RegSequence;
Dan Gohmanad7321f2008-09-15 21:22:06 +00001421
Dan Gohmana10756e2010-01-21 02:09:26 +00001422 void OptimizeShadowIV();
1423 bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
1424 const SCEV* &CondStride);
1425 ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
1426 bool StrideMightBeShared(const SCEV* Stride);
Dan Gohmaneca35b72010-01-21 23:01:22 +00001427 bool OptimizeLoopTermCond();
Dan Gohmanad7321f2008-09-15 21:22:06 +00001428
Dan Gohmana10756e2010-01-21 02:09:26 +00001429 LSRUse &getNewUse() {
1430 Uses.push_back(LSRUse());
1431 return Uses.back();
1432 }
Dan Gohmanbc10b8c2009-03-04 20:49:01 +00001433
Dan Gohmana10756e2010-01-21 02:09:26 +00001434 void CountRegister(const SCEV *Reg, uint32_t Complexity, size_t LUIdx);
1435 void CountRegisters(const Formula &F, size_t LUIdx);
Dan Gohmanad7321f2008-09-15 21:22:06 +00001436
Dan Gohman8b0ade32010-01-21 22:42:49 +00001437 bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
1438
Dan Gohmana10756e2010-01-21 02:09:26 +00001439 void GenerateSymbolicOffsetReuse(LSRUse &LU, unsigned LUIdx,
1440 Formula Base);
1441 void GenerateICmpZeroScaledReuse(LSRUse &LU, unsigned LUIdx,
1442 Formula Base);
1443 void GenerateFormulaeFromReplacedBaseReg(LSRUse &LU,
1444 unsigned LUIdx,
1445 const Formula &Base, unsigned i,
1446 const SmallVectorImpl<const SCEV *>
1447 &AddOps);
1448 void GenerateReassociationReuse(LSRUse &LU, unsigned LUIdx,
1449 Formula Base);
1450 void GenerateCombinationReuse(LSRUse &LU, unsigned LUIdx,
1451 Formula Base);
1452 void GenerateScaledReuse(LSRUse &LU, unsigned LUIdx,
1453 Formula Base);
1454 void GenerateTruncateReuse(LSRUse &LU, unsigned LUIdx,
1455 Formula Base);
Dan Gohman2781f302009-06-19 23:23:27 +00001456
Dan Gohmana10756e2010-01-21 02:09:26 +00001457 void GenerateConstantOffsetReuse();
Dan Gohmanad7321f2008-09-15 21:22:06 +00001458
Dan Gohmana10756e2010-01-21 02:09:26 +00001459 void GenerateAllReuseFormulae();
1460
1461 void GenerateLoopInvariantRegisterUses();
1462
1463public:
1464 LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
1465
1466 bool getChanged() const { return Changed; }
1467
1468 void print(raw_ostream &OS) const;
1469 void dump() const;
1470};
1471
Dan Gohmanad7321f2008-09-15 21:22:06 +00001472}
1473
Devang Patela0b39092008-08-26 17:57:54 +00001474/// OptimizeShadowIV - If IV is used in a int-to-float cast
1475/// inside the loop then try to eliminate the cast opeation.
Dan Gohmana10756e2010-01-21 02:09:26 +00001476void LSRInstance::OptimizeShadowIV() {
1477 const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
Dan Gohman46bdfb02009-02-24 18:55:53 +00001478 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
Devang Patela0b39092008-08-26 17:57:54 +00001479 return;
Jim Grosbach56a1f802009-11-17 17:53:56 +00001480
Dan Gohmana10756e2010-01-21 02:09:26 +00001481 for (size_t StrideIdx = 0, e = IU.StrideOrder.size();
1482 StrideIdx != e; ++StrideIdx) {
Dan Gohman0bba49c2009-07-07 17:06:11 +00001483 std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
Dan Gohmana10756e2010-01-21 02:09:26 +00001484 IU.IVUsesByStride.find(IU.StrideOrder[StrideIdx]);
1485 assert(SI != IU.IVUsesByStride.end() && "Stride doesn't exist!");
Devang Patela0b39092008-08-26 17:57:54 +00001486 if (!isa<SCEVConstant>(SI->first))
1487 continue;
1488
Dan Gohman81db61a2009-05-12 02:17:14 +00001489 for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
1490 E = SI->second->Users.end(); UI != E; /* empty */) {
1491 ilist<IVStrideUse>::iterator CandidateUI = UI;
Devang Patel54153272008-08-27 17:50:18 +00001492 ++UI;
Dan Gohman81db61a2009-05-12 02:17:14 +00001493 Instruction *ShadowUse = CandidateUI->getUser();
Devang Patela0b39092008-08-26 17:57:54 +00001494 const Type *DestTy = NULL;
1495
1496 /* If shadow use is a int->float cast then insert a second IV
Devang Patel54153272008-08-27 17:50:18 +00001497 to eliminate this cast.
Devang Patela0b39092008-08-26 17:57:54 +00001498
Jim Grosbach56a1f802009-11-17 17:53:56 +00001499 for (unsigned i = 0; i < n; ++i)
Devang Patela0b39092008-08-26 17:57:54 +00001500 foo((double)i);
1501
Devang Patel54153272008-08-27 17:50:18 +00001502 is transformed into
Devang Patela0b39092008-08-26 17:57:54 +00001503
1504 double d = 0.0;
Jim Grosbach56a1f802009-11-17 17:53:56 +00001505 for (unsigned i = 0; i < n; ++i, ++d)
Devang Patela0b39092008-08-26 17:57:54 +00001506 foo(d);
1507 */
Dan Gohman81db61a2009-05-12 02:17:14 +00001508 if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
Devang Patela0b39092008-08-26 17:57:54 +00001509 DestTy = UCast->getDestTy();
Dan Gohman81db61a2009-05-12 02:17:14 +00001510 else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser()))
Devang Patela0b39092008-08-26 17:57:54 +00001511 DestTy = SCast->getDestTy();
Devang Patel18bb2782008-08-27 20:55:23 +00001512 if (!DestTy) continue;
1513
1514 if (TLI) {
Evan Cheng5792f512009-05-11 22:33:01 +00001515 // If target does not support DestTy natively then do not apply
1516 // this transformation.
Owen Andersone50ed302009-08-10 22:56:29 +00001517 EVT DVT = TLI->getValueType(DestTy);
Devang Patel18bb2782008-08-27 20:55:23 +00001518 if (!TLI->isTypeLegal(DVT)) continue;
1519 }
1520
Devang Patela0b39092008-08-26 17:57:54 +00001521 PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
1522 if (!PH) continue;
1523 if (PH->getNumIncomingValues() != 2) continue;
1524
1525 const Type *SrcTy = PH->getType();
1526 int Mantissa = DestTy->getFPMantissaWidth();
Jim Grosbach56a1f802009-11-17 17:53:56 +00001527 if (Mantissa == -1) continue;
Dan Gohmana10756e2010-01-21 02:09:26 +00001528 if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
Devang Patela0b39092008-08-26 17:57:54 +00001529 continue;
1530
1531 unsigned Entry, Latch;
1532 if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
1533 Entry = 0;
1534 Latch = 1;
1535 } else {
1536 Entry = 1;
1537 Latch = 0;
1538 }
Jim Grosbach56a1f802009-11-17 17:53:56 +00001539
Devang Patela0b39092008-08-26 17:57:54 +00001540 ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
1541 if (!Init) continue;
Owen Anderson6f83c9c2009-07-27 20:59:43 +00001542 Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
Devang Patela0b39092008-08-26 17:57:54 +00001543
Jim Grosbach56a1f802009-11-17 17:53:56 +00001544 BinaryOperator *Incr =
Devang Patela0b39092008-08-26 17:57:54 +00001545 dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
1546 if (!Incr) continue;
1547 if (Incr->getOpcode() != Instruction::Add
1548 && Incr->getOpcode() != Instruction::Sub)
1549 continue;
1550
1551 /* Initialize new IV, double d = 0.0 in above example. */
1552 ConstantInt *C = NULL;
1553 if (Incr->getOperand(0) == PH)
1554 C = dyn_cast<ConstantInt>(Incr->getOperand(1));
1555 else if (Incr->getOperand(1) == PH)
1556 C = dyn_cast<ConstantInt>(Incr->getOperand(0));
1557 else
1558 continue;
1559
1560 if (!C) continue;
1561
Dan Gohmana8225082009-10-26 15:32:57 +00001562 // Ignore negative constants, as the code below doesn't handle them
1563 // correctly. TODO: Remove this restriction.
1564 if (!C->getValue().isStrictlyPositive()) continue;
1565
Devang Patela0b39092008-08-26 17:57:54 +00001566 /* Add new PHINode. */
1567 PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
1568
Devang Patel54153272008-08-27 17:50:18 +00001569 /* create new increment. '++d' in above example. */
Owen Anderson6f83c9c2009-07-27 20:59:43 +00001570 Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
Jim Grosbach56a1f802009-11-17 17:53:56 +00001571 BinaryOperator *NewIncr =
Dan Gohmanae3a0be2009-06-04 22:49:04 +00001572 BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
1573 Instruction::FAdd : Instruction::FSub,
Devang Patela0b39092008-08-26 17:57:54 +00001574 NewPH, CFP, "IV.S.next.", Incr);
1575
1576 NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
1577 NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
1578
1579 /* Remove cast operation */
Devang Patela0b39092008-08-26 17:57:54 +00001580 ShadowUse->replaceAllUsesWith(NewPH);
1581 ShadowUse->eraseFromParent();
Devang Patela0b39092008-08-26 17:57:54 +00001582 break;
1583 }
1584 }
1585}
1586
Dan Gohmana10756e2010-01-21 02:09:26 +00001587/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
1588/// set the IV user and stride information and return true, otherwise return
1589/// false.
1590bool LSRInstance::FindIVUserForCond(ICmpInst *Cond,
1591 IVStrideUse *&CondUse,
1592 const SCEV* &CondStride) {
1593 for (unsigned StrideIdx = 0, e = IU.StrideOrder.size();
1594 StrideIdx != e && !CondUse; ++StrideIdx) {
1595 std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
1596 IU.IVUsesByStride.find(IU.StrideOrder[StrideIdx]);
1597 assert(SI != IU.IVUsesByStride.end() && "Stride doesn't exist!");
Chris Lattner010de252005-08-08 05:28:22 +00001598
Dan Gohmana10756e2010-01-21 02:09:26 +00001599 for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
1600 E = SI->second->Users.end(); UI != E; ++UI)
1601 if (UI->getUser() == Cond) {
1602 // NOTE: we could handle setcc instructions with multiple uses here, but
1603 // InstCombine does it as well for simple uses, it's not clear that it
1604 // occurs enough in real life to handle.
1605 CondUse = UI;
1606 CondStride = SI->first;
1607 return true;
1608 }
1609 }
1610 return false;
Evan Cheng2d850522009-05-09 01:08:24 +00001611}
1612
Dan Gohmana10756e2010-01-21 02:09:26 +00001613/// OptimizeMax - Rewrite the loop's terminating condition if it uses
1614/// a max computation.
1615///
1616/// This is a narrow solution to a specific, but acute, problem. For loops
1617/// like this:
1618///
1619/// i = 0;
1620/// do {
1621/// p[i] = 0.0;
1622/// } while (++i < n);
1623///
1624/// the trip count isn't just 'n', because 'n' might not be positive. And
1625/// unfortunately this can come up even for loops where the user didn't use
1626/// a C do-while loop. For example, seemingly well-behaved top-test loops
1627/// will commonly be lowered like this:
1628//
1629/// if (n > 0) {
1630/// i = 0;
1631/// do {
1632/// p[i] = 0.0;
1633/// } while (++i < n);
1634/// }
1635///
1636/// and then it's possible for subsequent optimization to obscure the if
1637/// test in such a way that indvars can't find it.
1638///
1639/// When indvars can't find the if test in loops like this, it creates a
1640/// max expression, which allows it to give the loop a canonical
1641/// induction variable:
1642///
1643/// i = 0;
1644/// max = n < 1 ? 1 : n;
1645/// do {
1646/// p[i] = 0.0;
1647/// } while (++i != max);
1648///
1649/// Canonical induction variables are necessary because the loop passes
1650/// are designed around them. The most obvious example of this is the
1651/// LoopInfo analysis, which doesn't remember trip count values. It
1652/// expects to be able to rediscover the trip count each time it is
1653/// needed, and it does this using a simple analysis that only succeeds if
1654/// the loop has a canonical induction variable.
1655///
1656/// However, when it comes time to generate code, the maximum operation
1657/// can be quite costly, especially if it's inside of an outer loop.
1658///
1659/// This function solves this problem by detecting this type of loop and
1660/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
1661/// the instructions for the maximum computation.
1662///
1663ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
1664 // Check that the loop matches the pattern we're looking for.
1665 if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
1666 Cond->getPredicate() != CmpInst::ICMP_NE)
1667 return Cond;
1668
1669 SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
1670 if (!Sel || !Sel->hasOneUse()) return Cond;
1671
1672 const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
1673 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
1674 return Cond;
1675 const SCEV *One = SE.getIntegerSCEV(1, BackedgeTakenCount->getType());
1676
1677 // Add one to the backedge-taken count to get the trip count.
1678 const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One);
1679
1680 // Check for a max calculation that matches the pattern.
1681 if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount))
1682 return Cond;
1683 const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount);
1684 if (Max != SE.getSCEV(Sel)) return Cond;
1685
1686 // To handle a max with more than two operands, this optimization would
1687 // require additional checking and setup.
1688 if (Max->getNumOperands() != 2)
1689 return Cond;
1690
1691 const SCEV *MaxLHS = Max->getOperand(0);
1692 const SCEV *MaxRHS = Max->getOperand(1);
1693 if (!MaxLHS || MaxLHS != One) return Cond;
1694 // Check the relevant induction variable for conformance to
1695 // the pattern.
1696 const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
1697 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
1698 if (!AR || !AR->isAffine() ||
1699 AR->getStart() != One ||
1700 AR->getStepRecurrence(SE) != One)
1701 return Cond;
1702
1703 assert(AR->getLoop() == L &&
1704 "Loop condition operand is an addrec in a different loop!");
1705
1706 // Check the right operand of the select, and remember it, as it will
1707 // be used in the new comparison instruction.
1708 Value *NewRHS = 0;
1709 if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
1710 NewRHS = Sel->getOperand(1);
1711 else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
1712 NewRHS = Sel->getOperand(2);
1713 if (!NewRHS) return Cond;
1714
1715 // Determine the new comparison opcode. It may be signed or unsigned,
1716 // and the original comparison may be either equality or inequality.
1717 CmpInst::Predicate Pred =
1718 isa<SCEVSMaxExpr>(Max) ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
1719 if (Cond->getPredicate() == CmpInst::ICMP_EQ)
1720 Pred = CmpInst::getInversePredicate(Pred);
1721
1722 // Ok, everything looks ok to change the condition into an SLT or SGE and
1723 // delete the max calculation.
1724 ICmpInst *NewCond =
1725 new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
1726
1727 // Delete the max calculation instructions.
1728 Cond->replaceAllUsesWith(NewCond);
1729 CondUse->setUser(NewCond);
1730 Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
1731 Cond->eraseFromParent();
1732 Sel->eraseFromParent();
1733 if (Cmp->use_empty())
1734 Cmp->eraseFromParent();
1735 return NewCond;
1736}
1737
1738bool LSRInstance::StrideMightBeShared(const SCEV* Stride) {
Evan Cheng586f69a2009-11-12 07:35:05 +00001739 int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue();
Dan Gohmana10756e2010-01-21 02:09:26 +00001740 for (unsigned i = 0, e = IU.StrideOrder.size(); i != e; ++i) {
Evan Cheng586f69a2009-11-12 07:35:05 +00001741 std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
Dan Gohmana10756e2010-01-21 02:09:26 +00001742 IU.IVUsesByStride.find(IU.StrideOrder[i]);
Evan Cheng586f69a2009-11-12 07:35:05 +00001743 const SCEV *Share = SI->first;
1744 if (!isa<SCEVConstant>(SI->first) || Share == Stride)
1745 continue;
1746 int64_t SSInt = cast<SCEVConstant>(Share)->getValue()->getSExtValue();
1747 if (SSInt == SInt)
1748 return true; // This can definitely be reused.
1749 if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0)
1750 continue;
1751 int64_t Scale = SSInt / SInt;
Dan Gohmana10756e2010-01-21 02:09:26 +00001752
1753 // This AM will be used for conservative queries. At this point in the
1754 // process we don't know which users will have a base reg, immediate,
1755 // etc., so we conservatively assume that it may not, making more
1756 // strides valid, thus erring on the side of assuming that there
1757 // might be sharing.
1758 TargetLowering::AddrMode AM;
1759 AM.Scale = Scale;
1760
1761 // Any pre-inc iv use?
1762 IVUsersOfOneStride &StrideUses = *IU.IVUsesByStride[Share];
1763 for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
1764 E = StrideUses.Users.end(); I != E; ++I) {
1765 bool isAddress = isAddressUse(I->getUser(), I->getOperandValToReplace());
1766 if (!I->isUseOfPostIncrementedValue() &&
1767 isLegalUse(AM, isAddress ? LSRUse::Address : LSRUse::Basic,
1768 isAddress ? getAccessType(I->getUser()) : 0,
1769 TLI))
Evan Cheng586f69a2009-11-12 07:35:05 +00001770 return true;
Evan Cheng5792f512009-05-11 22:33:01 +00001771 }
Evan Cheng5792f512009-05-11 22:33:01 +00001772 }
Evan Cheng586f69a2009-11-12 07:35:05 +00001773 return false;
Chris Lattner010de252005-08-08 05:28:22 +00001774}
Nate Begeman16997482005-07-30 00:15:07 +00001775
Jim Grosbach56a1f802009-11-17 17:53:56 +00001776/// OptimizeLoopTermCond - Change loop terminating condition to use the
Evan Cheng586f69a2009-11-12 07:35:05 +00001777/// postinc iv when possible.
Dan Gohmana10756e2010-01-21 02:09:26 +00001778bool
Dan Gohmaneca35b72010-01-21 23:01:22 +00001779LSRInstance::OptimizeLoopTermCond() {
Dan Gohmana10756e2010-01-21 02:09:26 +00001780 SmallPtrSet<Instruction *, 4> PostIncs;
1781
Evan Cheng586f69a2009-11-12 07:35:05 +00001782 BasicBlock *LatchBlock = L->getLoopLatch();
Evan Cheng076e0852009-11-17 18:10:11 +00001783 SmallVector<BasicBlock*, 8> ExitingBlocks;
1784 L->getExitingBlocks(ExitingBlocks);
Jim Grosbach56a1f802009-11-17 17:53:56 +00001785
Evan Cheng076e0852009-11-17 18:10:11 +00001786 for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
1787 BasicBlock *ExitingBlock = ExitingBlocks[i];
Evan Cheng586f69a2009-11-12 07:35:05 +00001788
Dan Gohmana10756e2010-01-21 02:09:26 +00001789 // Get the terminating condition for the loop if possible. If we
Evan Cheng076e0852009-11-17 18:10:11 +00001790 // can, we want to change it to use a post-incremented version of its
1791 // induction variable, to allow coalescing the live ranges for the IV into
1792 // one register value.
Evan Cheng586f69a2009-11-12 07:35:05 +00001793
Evan Cheng076e0852009-11-17 18:10:11 +00001794 BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
1795 if (!TermBr)
1796 continue;
1797 // FIXME: Overly conservative, termination condition could be an 'or' etc..
1798 if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
1799 continue;
Evan Cheng586f69a2009-11-12 07:35:05 +00001800
Evan Cheng076e0852009-11-17 18:10:11 +00001801 // Search IVUsesByStride to find Cond's IVUse if there is one.
1802 IVStrideUse *CondUse = 0;
1803 const SCEV *CondStride = 0;
1804 ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
1805 if (!FindIVUserForCond(Cond, CondUse, CondStride))
1806 continue;
1807
Evan Cheng076e0852009-11-17 18:10:11 +00001808 // If the trip count is computed in terms of a max (due to ScalarEvolution
1809 // being unable to find a sufficient guard, for example), change the loop
1810 // comparison to use SLT or ULT instead of NE.
Dan Gohmana10756e2010-01-21 02:09:26 +00001811 // One consequence of doing this now is that it disrupts the count-down
1812 // optimization. That's not always a bad thing though, because in such
1813 // cases it may still be worthwhile to avoid a max.
1814 Cond = OptimizeMax(Cond, CondUse);
Evan Cheng076e0852009-11-17 18:10:11 +00001815
Dan Gohmana10756e2010-01-21 02:09:26 +00001816 // If this exiting block is the latch block, and the condition has only
1817 // one use inside the loop (the branch), use the post-incremented value
1818 // of the induction variable
1819 if (ExitingBlock != LatchBlock) {
1820 // If this exiting block dominates the latch block, it may also use
1821 // the post-inc value if it won't be shared with other uses.
1822 // Check for dominance.
1823 if (!DT.dominates(ExitingBlock, LatchBlock))
1824 continue;
1825 // Check for sharing within the same stride.
1826 bool SameStrideSharing = false;
1827 IVUsersOfOneStride &StrideUses = *IU.IVUsesByStride[CondStride];
1828 for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
1829 E = StrideUses.Users.end(); I != E; ++I) {
1830 if (I->getUser() == Cond)
1831 continue;
1832 if (!I->isUseOfPostIncrementedValue()) {
1833 SameStrideSharing = true;
1834 break;
1835 }
1836 }
1837 if (SameStrideSharing)
1838 continue;
1839 // Check for sharing from a different stride.
1840 if (isa<SCEVConstant>(CondStride) && StrideMightBeShared(CondStride))
1841 continue;
1842 }
1843 if (!Cond->hasOneUse()) {
1844 bool HasOneUseInLoop = true;
1845 for (Value::use_iterator UI = Cond->use_begin(), UE = Cond->use_end();
1846 UI != UE; ++UI) {
1847 Instruction *U = cast<Instruction>(*UI);
1848 if (U == TermBr)
1849 continue;
1850 if (L->contains(U)) {
1851 HasOneUseInLoop = false;
1852 break;
1853 }
1854 }
1855 if (!HasOneUseInLoop)
1856 continue;
Evan Cheng076e0852009-11-17 18:10:11 +00001857 }
1858
David Greene63c94632009-12-23 22:58:38 +00001859 DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "
Dan Gohmana10756e2010-01-21 02:09:26 +00001860 << *Cond << '\n');
Evan Cheng076e0852009-11-17 18:10:11 +00001861
1862 // It's possible for the setcc instruction to be anywhere in the loop, and
1863 // possible for it to have multiple users. If it is not immediately before
1864 // the exiting block branch, move it.
Dan Gohmana10756e2010-01-21 02:09:26 +00001865 if (&*++BasicBlock::iterator(Cond) != TermBr) {
Evan Cheng076e0852009-11-17 18:10:11 +00001866 if (Cond->hasOneUse()) { // Condition has a single use, just move it.
1867 Cond->moveBefore(TermBr);
1868 } else {
1869 // Otherwise, clone the terminating condition and insert into the
1870 // loopend.
Dan Gohmana10756e2010-01-21 02:09:26 +00001871 ICmpInst *OldCond = Cond;
Evan Cheng076e0852009-11-17 18:10:11 +00001872 Cond = cast<ICmpInst>(Cond->clone());
1873 Cond->setName(L->getHeader()->getName() + ".termcond");
1874 ExitingBlock->getInstList().insert(TermBr, Cond);
1875
1876 // Clone the IVUse, as the old use still exists!
Dan Gohmana10756e2010-01-21 02:09:26 +00001877 IU.IVUsesByStride[CondStride]->addUser(CondUse->getOffset(), Cond,
Evan Cheng076e0852009-11-17 18:10:11 +00001878 CondUse->getOperandValToReplace());
Dan Gohmana10756e2010-01-21 02:09:26 +00001879 CondUse = &IU.IVUsesByStride[CondStride]->Users.back();
1880 TermBr->replaceUsesOfWith(OldCond, Cond);
Evan Cheng076e0852009-11-17 18:10:11 +00001881 }
Evan Cheng586f69a2009-11-12 07:35:05 +00001882 }
1883
Evan Cheng076e0852009-11-17 18:10:11 +00001884 // If we get to here, we know that we can transform the setcc instruction to
1885 // use the post-incremented version of the IV, allowing us to coalesce the
1886 // live ranges for the IV correctly.
Dan Gohmana10756e2010-01-21 02:09:26 +00001887 CondUse->setOffset(SE.getMinusSCEV(CondUse->getOffset(), CondStride));
Evan Cheng076e0852009-11-17 18:10:11 +00001888 CondUse->setIsUseOfPostIncrementedValue(true);
1889 Changed = true;
1890
Dan Gohmana10756e2010-01-21 02:09:26 +00001891 PostIncs.insert(Cond);
1892 }
1893
1894 // Determine an insertion point for the loop induction variable increment. It
1895 // must dominate all the post-inc comparisons we just set up, and it must
1896 // dominate the loop latch edge.
1897 IVIncInsertPos = L->getLoopLatch()->getTerminator();
1898 for (SmallPtrSet<Instruction *, 4>::iterator I = PostIncs.begin(),
1899 E = PostIncs.end(); I != E; ++I) {
1900 BasicBlock *BB =
1901 DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
1902 (*I)->getParent());
1903 if (BB == (*I)->getParent())
1904 IVIncInsertPos = *I;
1905 else if (BB != IVIncInsertPos->getParent())
1906 IVIncInsertPos = BB->getTerminator();
1907 }
1908
1909 return Changed;
1910}
1911
1912/// CountRegisters - Note the given register.
1913void LSRInstance::CountRegister(const SCEV *Reg, uint32_t Complexity,
1914 size_t LUIdx) {
1915 std::pair<RegUsesTy::iterator, bool> Pair =
1916 RegUses.insert(std::make_pair(Reg, RegSortData()));
1917 RegSortData &BV = Pair.first->second;
1918 if (Pair.second) {
1919 BV.Index = CurrentArbitraryRegIndex++;
1920 BV.MaxComplexity = Complexity;
1921 RegSequence.push_back(Reg);
1922 } else {
1923 BV.MaxComplexity = std::max(BV.MaxComplexity, Complexity);
1924 }
1925 BV.Bits.resize(std::max(BV.Bits.size(), LUIdx + 1));
1926 BV.Bits.set(LUIdx);
1927}
1928
1929/// CountRegisters - Note which registers are used by the given formula,
1930/// updating RegUses.
1931void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
1932 uint32_t Complexity = F.getComplexity();
1933 if (F.ScaledReg)
1934 CountRegister(F.ScaledReg, Complexity, LUIdx);
1935 for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
1936 E = F.BaseRegs.end(); I != E; ++I)
1937 CountRegister(*I, Complexity, LUIdx);
1938}
1939
Dan Gohman8b0ade32010-01-21 22:42:49 +00001940/// InsertFormula - If the given formula has not yet been inserted, add it
1941/// to the list, and return true. Return false otherwise.
1942bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
1943 // If a formula by itself would require more registers than the base solution,
1944 // discard it and stop searching from it, as it won't be profitable. This is
1945 // actually more permissive than it could be, because it doesn't include
1946 // registers used by non-constant strides in F.
1947 if (F.getNumRegs() > MaxNumRegs)
1948 return false;
1949
1950 if (!LU.InsertFormula(F))
1951 return false;
1952
1953 CountRegisters(LU.Formulae.back(), LUIdx);
1954 return true;
1955}
1956
Dan Gohmana10756e2010-01-21 02:09:26 +00001957/// GenerateSymbolicOffsetReuse - Generate reuse formulae using symbolic
1958/// offsets.
1959void LSRInstance::GenerateSymbolicOffsetReuse(LSRUse &LU, unsigned LUIdx,
1960 Formula Base) {
1961 // We can't add a symbolic offset if the address already contains one.
1962 if (Base.AM.BaseGV) return;
1963
1964 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
1965 const SCEV *G = Base.BaseRegs[i];
1966 GlobalValue *GV = ExtractSymbol(G, SE);
1967 if (G->isZero())
1968 continue;
1969 Formula F = Base;
1970 F.AM.BaseGV = GV;
1971 if (!isLegalUse(F.AM, LU.Kind, LU.AccessTy, TLI))
1972 continue;
1973 F.BaseRegs[i] = G;
Dan Gohman8b0ade32010-01-21 22:42:49 +00001974 (void)InsertFormula(LU, LUIdx, F);
Evan Cheng586f69a2009-11-12 07:35:05 +00001975 }
Evan Cheng586f69a2009-11-12 07:35:05 +00001976}
1977
Dan Gohmana10756e2010-01-21 02:09:26 +00001978/// GenerateICmpZeroScaledReuse - For ICmpZero, check to see if we can scale up
1979/// the comparison. For example, x == y -> x*c == y*c.
1980void LSRInstance::GenerateICmpZeroScaledReuse(LSRUse &LU, unsigned LUIdx,
1981 Formula Base) {
1982 if (LU.Kind != LSRUse::ICmpZero) return;
Evan Cheng586f69a2009-11-12 07:35:05 +00001983
Dan Gohmana10756e2010-01-21 02:09:26 +00001984 // Determine the integer type for the base formula.
1985 const Type *IntTy = Base.getType();
1986 if (!IntTy) return;
1987 if (SE.getTypeSizeInBits(IntTy) > 64) return;
1988 IntTy = SE.getEffectiveSCEVType(IntTy);
Evan Cheng586f69a2009-11-12 07:35:05 +00001989
Dan Gohmana10756e2010-01-21 02:09:26 +00001990 assert(!Base.AM.BaseGV && "ICmpZero use is not legal!");
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00001991
Dan Gohmana10756e2010-01-21 02:09:26 +00001992 // Check each interesting stride.
1993 for (SmallSetVector<int64_t, 4>::const_iterator
1994 I = Factors.begin(), E = Factors.end(); I != E; ++I) {
1995 int64_t Factor = *I;
1996 Formula F = Base;
1997
1998 // Check that the multiplication doesn't overflow.
1999 F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs * Factor;
2000 if ((int64_t)F.AM.BaseOffs / Factor != F.AM.BaseOffs)
2001 continue;
2002
2003 // Check that this scale is legal.
2004 if (!isLegalUse(F.AM, LU.Kind, LU.AccessTy, TLI))
2005 continue;
2006
2007 const SCEV *FactorS = SE.getSCEV(ConstantInt::get(IntTy, Factor));
2008
2009 // Check that multiplying with each base register doesn't overflow.
2010 for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
2011 F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
2012 if (getSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
2013 goto next;
2014 }
2015
2016 // Check that multiplying with the scaled register doesn't overflow.
2017 if (F.ScaledReg) {
2018 F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
2019 if (getSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
2020 continue;
2021 }
2022
2023 // If we make it here and it's legal, add it.
Dan Gohman8b0ade32010-01-21 22:42:49 +00002024 (void)InsertFormula(LU, LUIdx, F);
Dan Gohmana10756e2010-01-21 02:09:26 +00002025 next:;
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00002026 }
Dan Gohmana10756e2010-01-21 02:09:26 +00002027}
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00002028
Dan Gohmana10756e2010-01-21 02:09:26 +00002029/// GenerateFormulaeFromReplacedBaseReg - If removing base register with
2030/// index i from the BaseRegs list and adding the registers in AddOps
2031/// to the address forms an interesting formula, pursue it.
2032void
2033LSRInstance::GenerateFormulaeFromReplacedBaseReg(
2034 LSRUse &LU,
2035 unsigned LUIdx,
2036 const Formula &Base, unsigned i,
2037 const SmallVectorImpl<const SCEV *>
2038 &AddOps) {
2039 if (AddOps.empty()) return;
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00002040
Dan Gohmana10756e2010-01-21 02:09:26 +00002041 Formula F = Base;
2042 std::swap(F.BaseRegs[i], F.BaseRegs.back());
2043 F.BaseRegs.pop_back();
2044 for (SmallVectorImpl<const SCEV *>::const_iterator I = AddOps.begin(),
2045 E = AddOps.end(); I != E; ++I)
2046 F.BaseRegs.push_back(*I);
2047 F.AM.HasBaseReg = !F.BaseRegs.empty();
Dan Gohman8b0ade32010-01-21 22:42:49 +00002048 if (InsertFormula(LU, LUIdx, F))
2049 // If that formula hadn't been seen before, recurse to find more like it.
Dan Gohmana10756e2010-01-21 02:09:26 +00002050 GenerateReassociationReuse(LU, LUIdx, LU.Formulae.back());
Dan Gohmana10756e2010-01-21 02:09:26 +00002051}
Evan Cheng81ebdcf2009-11-10 21:14:05 +00002052
Dan Gohmana10756e2010-01-21 02:09:26 +00002053/// GenerateReassociationReuse - Split out subexpressions from adds and
2054/// the bases of addrecs.
2055void LSRInstance::GenerateReassociationReuse(LSRUse &LU, unsigned LUIdx,
2056 Formula Base) {
2057 SmallVector<const SCEV *, 8> AddOps;
2058 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
2059 const SCEV *BaseReg = Base.BaseRegs[i];
2060 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BaseReg)) {
2061 for (SCEVAddExpr::op_iterator J = Add->op_begin(), JE = Add->op_end();
2062 J != JE; ++J) {
2063 // Don't pull a constant into a register if the constant could be
2064 // folded into an immediate field.
2065 if (isAlwaysFoldable(*J, true, LU.Kind, LU.AccessTy, TLI, SE)) continue;
2066 SmallVector<const SCEV *, 8> InnerAddOps;
2067 for (SCEVAddExpr::op_iterator K = Add->op_begin(); K != JE; ++K)
2068 if (K != J)
2069 InnerAddOps.push_back(*K);
2070 // Splitting a 2-operand add both ways is redundant. Pruning this
2071 // now saves compile time.
2072 if (InnerAddOps.size() < 2 && next(J) == JE)
2073 continue;
2074 AddOps.push_back(*J);
2075 const SCEV *InnerAdd = SE.getAddExpr(InnerAddOps);
2076 AddOps.push_back(InnerAdd);
2077 GenerateFormulaeFromReplacedBaseReg(LU, LUIdx, Base, i, AddOps);
2078 AddOps.clear();
2079 }
2080 } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BaseReg)) {
2081 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(AR->getStart())) {
2082 for (SCEVAddExpr::op_iterator J = Add->op_begin(), JE = Add->op_end();
2083 J != JE; ++J) {
2084 // Don't pull a constant into a register if the constant could be
2085 // folded into an immediate field.
2086 if (isAlwaysFoldable(*J, true, LU.Kind, LU.AccessTy, TLI, SE))
2087 continue;
2088 SmallVector<const SCEV *, 8> InnerAddOps;
2089 for (SCEVAddExpr::op_iterator K = Add->op_begin(); K != JE; ++K)
2090 if (K != J)
2091 InnerAddOps.push_back(*K);
2092 AddOps.push_back(*J);
2093 const SCEV *InnerAdd = SE.getAddExpr(InnerAddOps);
2094 AddOps.push_back(SE.getAddRecExpr(InnerAdd,
2095 AR->getStepRecurrence(SE),
2096 AR->getLoop()));
2097 GenerateFormulaeFromReplacedBaseReg(LU, LUIdx, Base, i, AddOps);
2098 AddOps.clear();
2099 }
2100 } else if (!isAlwaysFoldable(AR->getStart(), Base.BaseRegs.size() > 1,
2101 LU.Kind, LU.AccessTy,
2102 TLI, SE)) {
2103 AddOps.push_back(AR->getStart());
2104 AddOps.push_back(SE.getAddRecExpr(SE.getIntegerSCEV(0,
2105 BaseReg->getType()),
2106 AR->getStepRecurrence(SE),
2107 AR->getLoop()));
2108 GenerateFormulaeFromReplacedBaseReg(LU, LUIdx, Base, i, AddOps);
2109 AddOps.clear();
2110 }
2111 }
2112 }
2113}
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00002114
Dan Gohmana10756e2010-01-21 02:09:26 +00002115/// GenerateCombinationReuse - Generate a formula consisting of all of the
2116/// loop-dominating registers added into a single register.
2117void LSRInstance::GenerateCombinationReuse(LSRUse &LU, unsigned LUIdx,
2118 Formula Base) {
Dan Gohmaneca35b72010-01-21 23:01:22 +00002119 // This method is only intersting on a plurality of registers.
Dan Gohmana10756e2010-01-21 02:09:26 +00002120 if (Base.BaseRegs.size() <= 1) return;
2121
2122 Formula F = Base;
2123 F.BaseRegs.clear();
2124 SmallVector<const SCEV *, 4> Ops;
2125 for (SmallVectorImpl<const SCEV *>::const_iterator
2126 I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) {
2127 const SCEV *BaseReg = *I;
2128 if (BaseReg->properlyDominates(L->getHeader(), &DT) &&
2129 !BaseReg->hasComputableLoopEvolution(L))
2130 Ops.push_back(BaseReg);
2131 else
2132 F.BaseRegs.push_back(BaseReg);
2133 }
2134 if (Ops.size() > 1) {
2135 F.BaseRegs.push_back(SE.getAddExpr(Ops));
Dan Gohman8b0ade32010-01-21 22:42:49 +00002136 (void)InsertFormula(LU, LUIdx, F);
Dan Gohmana10756e2010-01-21 02:09:26 +00002137 }
2138}
2139
2140/// GenerateScaledReuse - Generate stride factor reuse formulae by making
2141/// use of scaled-offset address modes, for example.
2142void LSRInstance::GenerateScaledReuse(LSRUse &LU, unsigned LUIdx,
2143 Formula Base) {
2144 // Determine the integer type for the base formula.
2145 const Type *IntTy = Base.getType();
2146 if (!IntTy) return;
2147 IntTy = SE.getEffectiveSCEVType(IntTy);
2148
2149 // Check each interesting stride.
2150 for (SmallSetVector<int64_t, 4>::const_iterator
2151 I = Factors.begin(), E = Factors.end(); I != E; ++I) {
2152 int64_t Factor = *I;
2153
2154 // If this Formula already has a scaled register, we can't add another one.
2155 if (Base.AM.Scale != 0)
2156 continue;
2157 Formula F = Base;
2158 F.AM.Scale = Factor;
2159 // Check whether this scale is going to be legal.
2160 if (!isLegalUse(F.AM, LU.Kind, LU.AccessTy, TLI)) {
2161 // As a special-case, handle special out-of-loop Basic users specially.
2162 // TODO: Reconsider this special case.
2163 if (LU.Kind == LSRUse::Basic &&
2164 isLegalUse(F.AM, LSRUse::Special, LU.AccessTy, TLI) &&
2165 !L->contains(LU.UserInst))
2166 LU.Kind = LSRUse::Special;
2167 else
2168 continue;
2169 }
2170 // For each addrec base reg, apply the scale, if possible.
2171 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
2172 if (const SCEVAddRecExpr *AR =
2173 dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
2174 const SCEV *FactorS = SE.getSCEV(ConstantInt::get(IntTy, Factor));
2175 // Divide out the factor, ignoring high bits, since we'll be
2176 // scaling the value back up in the end.
2177 if (const SCEV *Quotient = getSDiv(AR, FactorS, SE, true)) {
2178 // TODO: This could be optimized to avoid all the copying.
2179 Formula NewF = F;
2180 NewF.ScaledReg = Quotient;
2181 std::swap(NewF.BaseRegs[i], NewF.BaseRegs.back());
2182 NewF.BaseRegs.pop_back();
2183 NewF.AM.HasBaseReg = !NewF.BaseRegs.empty();
Dan Gohman8b0ade32010-01-21 22:42:49 +00002184 (void)InsertFormula(LU, LUIdx, NewF);
Dan Gohmana10756e2010-01-21 02:09:26 +00002185 }
Evan Cheng586f69a2009-11-12 07:35:05 +00002186 }
2187 }
Evan Cheng586f69a2009-11-12 07:35:05 +00002188}
2189
Dan Gohmana10756e2010-01-21 02:09:26 +00002190/// GenerateTruncateReuse - Generate reuse formulae from different IV types.
2191void LSRInstance::GenerateTruncateReuse(LSRUse &LU, unsigned LUIdx,
2192 Formula Base) {
2193 // This requires TargetLowering to tell us which truncates are free.
2194 if (!TLI) return;
Evan Cheng586f69a2009-11-12 07:35:05 +00002195
Dan Gohmana10756e2010-01-21 02:09:26 +00002196 // Don't attempt to truncate symbolic values.
2197 if (Base.AM.BaseGV) return;
2198
2199 // Determine the integer type for the base formula.
2200 const Type *DstTy = Base.getType();
2201 if (!DstTy) return;
2202 DstTy = SE.getEffectiveSCEVType(DstTy);
2203
2204 for (SmallSetVector<const Type *, 4>::const_iterator
2205 I = Types.begin(), E = Types.end(); I != E; ++I) {
2206 const Type *SrcTy = *I;
2207 if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
2208 Formula F = Base;
2209 if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
2210 for (SmallVectorImpl<const SCEV *>::iterator J = F.BaseRegs.begin(),
2211 JE = F.BaseRegs.end(); J != JE; ++J)
2212 *J = SE.getAnyExtendExpr(*J, SrcTy);
Dan Gohman8b0ade32010-01-21 22:42:49 +00002213 (void)InsertFormula(LU, LUIdx, F);
Dan Gohmana10756e2010-01-21 02:09:26 +00002214 }
2215 }
2216}
2217
2218namespace {
2219
2220/// WorkItem - Helper class for GenerateConstantOffsetReuse. It's used to
2221/// defer modifications so that the search phase doesn't have to worry about
2222/// the data structures moving underneath it.
2223struct WorkItem {
2224 LSRUse *LU;
2225 size_t LUIdx;
2226 int64_t Imm;
2227 const SCEV *OrigReg;
2228
2229 WorkItem(LSRUse *U, size_t LI, int64_t I, const SCEV *R)
2230 : LU(U), LUIdx(LI), Imm(I), OrigReg(R) {}
2231
2232 void print(raw_ostream &OS) const;
2233 void dump() const;
2234};
2235
2236void WorkItem::print(raw_ostream &OS) const {
2237 OS << "in use ";
2238 LU->print(OS);
2239 OS << " (at index " << LUIdx << "), add offset " << Imm
2240 << " and compensate by adjusting refences to " << *OrigReg << "\n";
2241}
2242
2243void WorkItem::dump() const {
2244 print(errs()); errs() << '\n';
2245}
2246
2247}
2248
2249/// GenerateConstantOffsetReuse - Look for registers which are a constant
2250/// distance apart and try to form reuse opportunities between them.
2251void LSRInstance::GenerateConstantOffsetReuse() {
2252 // Group the registers by their value without any added constant offset.
2253 typedef std::map<int64_t, const SCEV *> ImmMapTy;
2254 typedef DenseMap<const SCEV *, ImmMapTy> RegMapTy;
2255 RegMapTy Map;
2256 SmallVector<const SCEV *, 8> Sequence;
2257 for (SmallVectorImpl<const SCEV *>::iterator I = RegSequence.begin(),
2258 E = RegSequence.end(); I != E; ++I) {
2259 const SCEV *Reg = *I;
2260 int64_t Imm = ExtractImmediate(Reg, SE);
2261 std::pair<RegMapTy::iterator, bool> Pair =
2262 Map.insert(std::make_pair(Reg, ImmMapTy()));
2263 if (Pair.second)
2264 Sequence.push_back(Reg);
2265 Pair.first->second.insert(std::make_pair(Imm, *I));
2266 }
2267
2268 // Insert an artificial expression at offset 0 (if there isn't one already),
2269 // as this may lead to more reuse opportunities.
2270 for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(),
2271 E = Sequence.end(); I != E; ++I)
2272 Map.find(*I)->second.insert(ImmMapTy::value_type(0, 0));
2273
2274 // Now examine each set of registers with the same base value. Build up
2275 // a list of work to do and do the work in a separate step so that we're
2276 // not adding formulae and register counts while we're searching.
2277 SmallVector<WorkItem, 32> WorkItems;
2278 for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(),
2279 E = Sequence.end(); I != E; ++I) {
2280 const SCEV *Reg = *I;
2281 const ImmMapTy &Imms = Map.find(Reg)->second;
2282 // Examine each offset.
2283 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
2284 J != JE; ++J) {
2285 const SCEV *OrigReg = J->second;
2286 // Skip the artifical register at offset 0.
2287 if (!OrigReg) continue;
2288
2289 int64_t JImm = J->first;
2290 const SmallBitVector &Bits = RegUses.find(OrigReg)->second.Bits;
2291
2292 // Examine each other offset associated with the same register. This is
2293 // quadradic in the number of registers with the same base, but it's
2294 // uncommon for this to be a large number.
2295 for (ImmMapTy::const_iterator M = Imms.begin(); M != JE; ++M) {
2296 if (M == J) continue;
2297
2298 // Compute the difference between the two.
2299 int64_t Imm = (uint64_t)JImm - M->first;
2300 for (int LUIdx = Bits.find_first(); LUIdx != -1;
2301 LUIdx = Bits.find_next(LUIdx))
2302 // Make a memo of this use, offset, and register tuple.
2303 WorkItems.push_back(WorkItem(&Uses[LUIdx], LUIdx, Imm, OrigReg));
Evan Cheng586f69a2009-11-12 07:35:05 +00002304 }
2305 }
2306 }
2307
Dan Gohmana10756e2010-01-21 02:09:26 +00002308 // Now iterate through the worklist and add new formulae.
2309 for (SmallVectorImpl<WorkItem>::const_iterator I = WorkItems.begin(),
2310 E = WorkItems.end(); I != E; ++I) {
2311 const WorkItem &WI = *I;
2312 LSRUse &LU = *WI.LU;
2313 size_t LUIdx = WI.LUIdx;
2314 int64_t Imm = WI.Imm;
2315 const SCEV *OrigReg = WI.OrigReg;
2316
2317 const Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
2318 const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy,
2319 -(uint64_t)Imm));
2320
2321 for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
2322 Formula F = LU.Formulae[L];
2323 // Use the immediate in the scaled register.
2324 if (F.ScaledReg == OrigReg) {
2325 int64_t Offs = (uint64_t)F.AM.BaseOffs +
2326 Imm * (uint64_t)F.AM.Scale;
2327 // Don't create 50 + reg(-50).
2328 if (F.referencesReg(SE.getSCEV(
2329 ConstantInt::get(IntTy, -(uint64_t)Offs))))
2330 continue;
2331 Formula NewF = F;
2332 NewF.AM.BaseOffs = Offs;
2333 if (!isLegalUse(NewF.AM, LU.Kind, LU.AccessTy, TLI))
2334 continue;
2335 const SCEV *Diff = SE.getAddExpr(NegImmS, NewF.ScaledReg);
2336 if (Diff->isZero()) continue;
2337 NewF.ScaledReg = Diff;
Dan Gohman8b0ade32010-01-21 22:42:49 +00002338 (void)InsertFormula(LU, LUIdx, NewF);
Dan Gohmana10756e2010-01-21 02:09:26 +00002339 }
2340 // Use the immediate in a base register.
2341 for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
2342 const SCEV *BaseReg = F.BaseRegs[N];
2343 if (BaseReg != OrigReg)
2344 continue;
2345 Formula NewF = F;
2346 NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
2347 if (!isLegalUse(NewF.AM, LU.Kind, LU.AccessTy, TLI))
2348 continue;
2349 const SCEV *Diff = SE.getAddExpr(NegImmS, BaseReg);
2350 if (Diff->isZero()) continue;
2351 // Don't create 50 + reg(-50).
2352 if (Diff ==
2353 SE.getSCEV(ConstantInt::get(IntTy,
2354 -(uint64_t)NewF.AM.BaseOffs)))
2355 continue;
2356 NewF.BaseRegs[N] = Diff;
Dan Gohman8b0ade32010-01-21 22:42:49 +00002357 (void)InsertFormula(LU, LUIdx, NewF);
Dan Gohmana10756e2010-01-21 02:09:26 +00002358 }
2359 }
2360 }
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00002361}
2362
Dan Gohmana10756e2010-01-21 02:09:26 +00002363/// GenerateAllReuseFormulae - Generate formulae for each use.
2364void
2365LSRInstance::GenerateAllReuseFormulae() {
2366 SmallVector<Formula, 12> Save;
2367 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
2368 LSRUse &LU = Uses[LUIdx];
2369
2370 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
2371 GenerateSymbolicOffsetReuse(LU, LUIdx, LU.Formulae[i]);
2372 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
2373 GenerateICmpZeroScaledReuse(LU, LUIdx, LU.Formulae[i]);
2374 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
2375 GenerateReassociationReuse(LU, LUIdx, LU.Formulae[i]);
2376 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
2377 GenerateCombinationReuse(LU, LUIdx, LU.Formulae[i]);
2378 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
2379 GenerateScaledReuse(LU, LUIdx, LU.Formulae[i]);
2380 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
2381 GenerateTruncateReuse(LU, LUIdx, LU.Formulae[i]);
2382 }
2383
2384 GenerateConstantOffsetReuse();
2385}
2386
2387/// GenerateLoopInvariantRegisterUses - Check for other uses of loop-invariant
2388/// values which we're tracking. These other uses will pin these values in
2389/// registers, making them less profitable for elimination.
2390/// TODO: This currently misses non-constant addrec step registers.
2391/// TODO: Should this give more weight to users inside the loop?
2392void
2393LSRInstance::GenerateLoopInvariantRegisterUses() {
Dan Gohman8b0ade32010-01-21 22:42:49 +00002394 SmallVector<const SCEV *, 8> Worklist(RegSequence.begin(), RegSequence.end());
2395
2396 while (!Worklist.empty()) {
2397 const SCEV *S = Worklist.pop_back_val();
2398
2399 if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
2400 Worklist.insert(Worklist.end(), N->op_begin(), N->op_end());
2401 else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
2402 Worklist.push_back(C->getOperand());
2403 else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
2404 Worklist.push_back(D->getLHS());
2405 Worklist.push_back(D->getRHS());
2406 } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
Dan Gohmana10756e2010-01-21 02:09:26 +00002407 const Value *V = U->getValue();
2408 if (const Instruction *Inst = dyn_cast<Instruction>(V))
2409 if (L->contains(Inst)) continue;
2410 for (Value::use_const_iterator UI = V->use_begin(), UE = V->use_end();
2411 UI != UE; ++UI) {
2412 const Instruction *UserInst = dyn_cast<Instruction>(*UI);
2413 // Ignore non-instructions.
2414 if (!UserInst)
2415 continue;
2416 // Ignore instructions in other functions (as can happen with
2417 // Constants).
2418 if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
2419 continue;
2420 // Ignore instructions not dominated by the loop.
2421 const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
2422 UserInst->getParent() :
2423 cast<PHINode>(UserInst)->getIncomingBlock(
2424 PHINode::getIncomingValueNumForOperand(UI.getOperandNo()));
2425 if (!DT.dominates(L->getHeader(), UseBB))
2426 continue;
2427 // Ignore uses which are part of other SCEV expressions, to avoid
2428 // analyzing them multiple times.
2429 if (SE.isSCEVable(UserInst->getType()) &&
2430 !isa<SCEVUnknown>(SE.getSCEV(const_cast<Instruction *>(UserInst))))
2431 continue;
2432 // Ignore icmp instructions which are already being analyzed.
2433 if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
2434 unsigned OtherIdx = !UI.getOperandNo();
2435 Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
2436 if (SE.getSCEV(OtherOp)->hasComputableLoopEvolution(L))
2437 continue;
2438 }
2439
2440 LSRUse &LU = getNewUse();
2441 LU.UserInst = const_cast<Instruction *>(UserInst);
2442 LU.OperandValToReplace = UI.getUse();
2443 LU.InsertSupplementalFormula(U);
2444 CountRegisters(LU.Formulae.back(), Uses.size() - 1);
2445 }
2446 }
Dan Gohman8b0ade32010-01-21 22:42:49 +00002447 }
Dan Gohmana10756e2010-01-21 02:09:26 +00002448}
2449
2450#ifndef NDEBUG
2451
2452static void debug_winner(SmallVector<LSRUse, 16> const &Uses) {
2453 dbgs() << "LSR has selected formulae for each use:\n";
2454 for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
2455 E = Uses.end(); I != E; ++I) {
2456 const LSRUse &LU = *I;
2457 dbgs() << " ";
2458 LU.print(dbgs());
2459 dbgs() << '\n';
2460 dbgs() << " ";
2461 LU.Formulae.front().print(dbgs());
2462 dbgs() << "\n";
2463 }
2464}
2465
2466#endif
2467
2468LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
2469 : IU(P->getAnalysis<IVUsers>()),
2470 SE(P->getAnalysis<ScalarEvolution>()),
2471 DT(P->getAnalysis<DominatorTree>()),
Dan Gohmaneca35b72010-01-21 23:01:22 +00002472 TLI(tli), L(l), Changed(false), IVIncInsertPos(0),
2473 CurrentArbitraryRegIndex(0), MaxNumRegs(0) {
Devang Patel0f54dcb2007-03-06 21:14:09 +00002474
Dan Gohman03e896b2009-11-05 21:11:53 +00002475 // If LoopSimplify form is not available, stay out of trouble.
Dan Gohmana10756e2010-01-21 02:09:26 +00002476 if (!L->isLoopSimplifyForm()) return;
Dan Gohman03e896b2009-11-05 21:11:53 +00002477
Dan Gohmana10756e2010-01-21 02:09:26 +00002478 // If there's no interesting work to be done, bail early.
2479 if (IU.IVUsesByStride.empty()) return;
Dan Gohman80b0f8c2009-03-09 20:34:59 +00002480
Dan Gohmana10756e2010-01-21 02:09:26 +00002481 DEBUG(dbgs() << "\nLSR on loop ";
2482 WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
2483 dbgs() << ":\n");
Dan Gohmanf7912df2009-03-09 20:46:50 +00002484
Dan Gohmana10756e2010-01-21 02:09:26 +00002485 // Sort the StrideOrder so we process larger strides first.
2486 std::stable_sort(IU.StrideOrder.begin(), IU.StrideOrder.end(),
2487 StrideCompare(SE));
Chris Lattner010de252005-08-08 05:28:22 +00002488
Dan Gohmana10756e2010-01-21 02:09:26 +00002489 /// OptimizeShadowIV - If IV is used in a int-to-float cast
2490 /// inside the loop then try to eliminate the cast opeation.
2491 OptimizeShadowIV();
Evan Cheng5792f512009-05-11 22:33:01 +00002492
Dan Gohmana10756e2010-01-21 02:09:26 +00002493 // Change loop terminating condition to use the postinc iv when possible.
Dan Gohmaneca35b72010-01-21 23:01:22 +00002494 Changed |= OptimizeLoopTermCond();
Chris Lattner010de252005-08-08 05:28:22 +00002495
Dan Gohmana10756e2010-01-21 02:09:26 +00002496 for (SmallVectorImpl<const SCEV *>::const_iterator SIter =
2497 IU.StrideOrder.begin(), SEnd = IU.StrideOrder.end();
2498 SIter != SEnd; ++SIter) {
2499 const SCEV *Stride = *SIter;
Misha Brukmanfd939082005-04-21 23:48:37 +00002500
Dan Gohmana10756e2010-01-21 02:09:26 +00002501 // Collect interesting types.
2502 Types.insert(SE.getEffectiveSCEVType(Stride->getType()));
Evan Chengd1d6b5c2006-03-16 21:53:05 +00002503
Dan Gohmana10756e2010-01-21 02:09:26 +00002504 // Collect interesting factors.
2505 for (SmallVectorImpl<const SCEV *>::const_iterator NewStrideIter =
2506 SIter + 1; NewStrideIter != SEnd; ++NewStrideIter) {
2507 const SCEV *OldStride = Stride;
2508 const SCEV *NewStride = *NewStrideIter;
Nate Begemaneaa13852004-10-18 21:08:22 +00002509
Dan Gohmana10756e2010-01-21 02:09:26 +00002510 if (SE.getTypeSizeInBits(OldStride->getType()) !=
2511 SE.getTypeSizeInBits(NewStride->getType())) {
2512 if (SE.getTypeSizeInBits(OldStride->getType()) >
2513 SE.getTypeSizeInBits(NewStride->getType()))
2514 NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
2515 else
2516 OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
2517 }
2518 if (const SCEVConstant *Factor =
2519 dyn_cast_or_null<SCEVConstant>(getSDiv(NewStride, OldStride,
2520 SE, true)))
2521 if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
2522 Factors.insert(Factor->getValue()->getValue().getSExtValue());
2523 }
Dan Gohman6bec5bb2009-12-18 00:06:20 +00002524
Dan Gohmana10756e2010-01-21 02:09:26 +00002525 std::map<const SCEV *, IVUsersOfOneStride *>::const_iterator SI =
2526 IU.IVUsesByStride.find(Stride);
2527 assert(SI != IU.IVUsesByStride.end() && "Stride doesn't exist!");
2528 for (ilist<IVStrideUse>::const_iterator UI = SI->second->Users.begin(),
2529 E = SI->second->Users.end(); UI != E; ++UI) {
2530 // Record the uses.
2531 LSRUse &LU = getNewUse();
2532 LU.UserInst = UI->getUser();
2533 LU.OperandValToReplace = UI->getOperandValToReplace();
2534 if (isAddressUse(LU.UserInst, LU.OperandValToReplace)) {
2535 LU.Kind = LSRUse::Address;
2536 LU.AccessTy = getAccessType(LU.UserInst);
2537 }
2538 if (UI->isUseOfPostIncrementedValue())
2539 LU.PostIncLoop = L;
Dan Gohman6bec5bb2009-12-18 00:06:20 +00002540
Dan Gohmana10756e2010-01-21 02:09:26 +00002541 const SCEV *S = IU.getCanonicalExpr(*UI);
2542
2543 // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
2544 // (N - i == 0), and this allows (N - i) to be the expression that we
2545 // work with rather than just N or i, so we can consider the register
2546 // requirements for both N and i at the same time. Limiting this code
2547 // to equality icmps is not a problem because all interesting loops
2548 // use equality icmps, thanks to IndVarSimplify.
2549 if (ICmpInst *CI = dyn_cast<ICmpInst>(LU.UserInst))
2550 if (CI->isEquality()) {
2551 // Swap the operands if needed to put the OperandValToReplace on
2552 // the left, for consistency.
2553 Value *NV = CI->getOperand(1);
2554 if (NV == LU.OperandValToReplace) {
2555 CI->setOperand(1, CI->getOperand(0));
2556 CI->setOperand(0, NV);
2557 }
2558
2559 // x == y --> x - y == 0
2560 const SCEV *N = SE.getSCEV(NV);
2561 if (N->isLoopInvariant(L)) {
2562 LU.Kind = LSRUse::ICmpZero;
2563 S = SE.getMinusSCEV(N, S);
2564 }
2565
2566 // -1 and the negations of all interesting strides (except the
2567 // negation of -1) are now also interesting.
2568 for (size_t i = 0, e = Factors.size(); i != e; ++i)
2569 if (Factors[i] != -1)
2570 Factors.insert(-(uint64_t)Factors[i]);
2571 Factors.insert(-1);
2572 }
2573
Dan Gohmaneca35b72010-01-21 23:01:22 +00002574 // Set up the initial formula for this use.
Dan Gohmana10756e2010-01-21 02:09:26 +00002575 LU.InsertInitialFormula(S, L, SE, DT);
2576 CountRegisters(LU.Formulae.back(), Uses.size() - 1);
2577 }
Evan Cheng81ebdcf2009-11-10 21:14:05 +00002578 }
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00002579
Dan Gohmana10756e2010-01-21 02:09:26 +00002580 // If all uses use the same type, don't bother looking for truncation-based
2581 // reuse.
2582 if (Types.size() == 1)
2583 Types.clear();
2584
Dan Gohmana10756e2010-01-21 02:09:26 +00002585 // If there are any uses of registers that we're tracking that have escaped
2586 // IVUsers' attention, add trivial uses for them, so that the register
2587 // voting process takes the into consideration.
2588 GenerateLoopInvariantRegisterUses();
2589
Dan Gohman8b0ade32010-01-21 22:42:49 +00002590 // Start by assuming we'll assign each use its own register. This is
2591 // sometimes called "full" strength reduction, or "superhero" mode.
2592 // Sometimes this is the best solution, but if there are opportunities for
2593 // reuse we may find a better solution.
2594 Score CurScore;
2595 CurScore.RateInitial(Uses, L, SE);
2596
2597 MaxNumRegs = CurScore.getNumRegs();
2598
2599 // Now use the reuse data to generate a bunch of interesting ways
2600 // to formulate the values needed for the uses.
2601 GenerateAllReuseFormulae();
2602
Dan Gohmana10756e2010-01-21 02:09:26 +00002603 // Sort the formulae. TODO: This is redundantly sorted below.
2604 for (SmallVectorImpl<LSRUse>::iterator I = Uses.begin(), E = Uses.end();
2605 I != E; ++I) {
2606 LSRUse &LU = *I;
2607 std::stable_sort(LU.Formulae.begin(), LU.Formulae.end(),
2608 ComplexitySorter());
2609 }
2610
2611 // Ok, we've now collected all the uses and noted their register uses. The
2612 // next step is to start looking at register reuse possibilities.
Dan Gohman940bd3e2010-01-21 22:46:32 +00002613 DEBUG(print(dbgs()); dbgs() << '\n'; IU.dump());
Dan Gohmana10756e2010-01-21 02:09:26 +00002614
Dan Gohmana10756e2010-01-21 02:09:26 +00002615 // Create a sorted list of registers with those with the most uses appearing
2616 // earlier in the list. We'll visit them first, as they're the most likely
2617 // to represent profitable reuse opportunities.
2618 SmallVector<RegCount, 8> RegOrder;
2619 for (SmallVectorImpl<const SCEV *>::const_iterator I =
2620 RegSequence.begin(), E = RegSequence.end(); I != E; ++I)
2621 RegOrder.push_back(RegCount(*I, RegUses.find(*I)->second));
2622 std::stable_sort(RegOrder.begin(), RegOrder.end());
2623
2624 // Visit each register. Determine which ones represent profitable reuse
2625 // opportunities and remember them.
2626 // TODO: Extract this code into a function.
2627 for (SmallVectorImpl<RegCount>::const_iterator I = RegOrder.begin(),
2628 E = RegOrder.end(); I != E; ++I) {
2629 const SCEV *Reg = I->Reg;
2630 const SmallBitVector &Bits = I->Sort.Bits;
2631
2632 // Registers with only one use don't represent reuse opportunities, so
2633 // when we get there, we're done.
2634 if (Bits.count() <= 1) break;
2635
2636 DEBUG(dbgs() << "Reg " << *Reg << ": ";
2637 I->Sort.print(dbgs());
2638 dbgs() << '\n');
2639
2640 // Determine the total number of registers will be needed if we make use
2641 // of the reuse opportunity represented by the current register.
2642 Score NewScore;
2643 NewScore.Rate(Reg, Bits, Uses, L, SE);
2644
2645 // Now decide whether this register's reuse opportunity is an overall win.
2646 // Currently the decision is heavily skewed for register pressure.
2647 if (!(NewScore < CurScore)) {
2648 continue;
2649 }
2650
2651 // Ok, use this opportunity.
2652 DEBUG(dbgs() << "This candidate has been accepted.\n");
2653 CurScore = NewScore;
2654
2655 // Now that we've selected a new reuse opportunity, delete formulae that
2656 // do not participate in that opportunity.
2657 for (int j = Bits.find_first(); j != -1; j = Bits.find_next(j)) {
2658 LSRUse &LU = Uses[j];
2659 for (unsigned k = 0, h = LU.Formulae.size(); k != h; ++k) {
2660 Formula &F = LU.Formulae[k];
2661 if (!F.referencesReg(Reg)) {
2662 std::swap(LU.Formulae[k], LU.Formulae.back());
2663 LU.Formulae.pop_back();
2664 --k; --h;
2665 }
2666 }
2667 // Also re-sort the list to put the formulae with the fewest registers
2668 // at the front.
2669 // TODO: Do this earlier, we don't need it each time.
2670 std::stable_sort(LU.Formulae.begin(), LU.Formulae.end(),
2671 ComplexitySorter());
2672 }
2673 }
2674
2675 // Ok, we've now made all our decisions. The first formula for each use
2676 // will be used.
2677 DEBUG(dbgs() << "Concluding, we need "; CurScore.print(dbgs());
2678 dbgs() << ".\n";
2679 debug_winner(Uses));
2680
2681 // Free memory no longer needed.
2682 RegOrder.clear();
2683 Factors.clear();
2684 Types.clear();
2685 RegUses.clear();
2686 RegSequence.clear();
2687
2688 // Keep track of instructions we may have made dead, so that
2689 // we can remove them after we are done working.
2690 SmallVector<WeakVH, 16> DeadInsts;
2691
2692 SCEVExpander Rewriter(SE);
2693 Rewriter.disableCanonicalMode();
2694 Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
2695
2696 // Expand the new value definitions and update the users.
2697 for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
2698 E = Uses.end(); I != E; ++I) {
2699 // Formulae should be legal.
2700 DEBUG(for (SmallVectorImpl<Formula>::const_iterator J = I->Formulae.begin(),
2701 JE = I->Formulae.end(); J != JE; ++J)
2702 assert(isLegalUse(J->AM, I->Kind, I->AccessTy, TLI) &&
2703 "Illegal formula generated!"));
2704
2705 // Expand the new code and update the user.
Dan Gohmaneca35b72010-01-21 23:01:22 +00002706 I->Rewrite(L, IVIncInsertPos, Rewriter, DeadInsts, SE, DT, P);
Dan Gohmana10756e2010-01-21 02:09:26 +00002707 Changed = true;
2708 }
2709
2710 // Clean up after ourselves. This must be done before deleting any
2711 // instructions.
2712 Rewriter.clear();
2713
2714 Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
2715}
2716
2717void LSRInstance::print(raw_ostream &OS) const {
Dan Gohman8b0ade32010-01-21 22:42:49 +00002718 if (MaxNumRegs != 0)
2719 OS << "LSR is considering " << MaxNumRegs << " to be the maximum "
2720 "number of registers needed.\n";
2721
Dan Gohmana10756e2010-01-21 02:09:26 +00002722 OS << "LSR has identified the following interesting factors and types: ";
2723 bool First = true;
2724
2725 for (SmallSetVector<int64_t, 4>::const_iterator
2726 I = Factors.begin(), E = Factors.end(); I != E; ++I) {
2727 if (!First) OS << ", ";
2728 First = false;
2729 OS << '*' << *I;
2730 }
2731
2732 for (SmallSetVector<const Type *, 4>::const_iterator
2733 I = Types.begin(), E = Types.end(); I != E; ++I) {
2734 if (!First) OS << ", ";
2735 First = false;
2736 OS << '(' << **I << ')';
2737 }
2738 OS << '\n';
2739
2740 OS << "LSR is examining the following uses, and candidate formulae:\n";
2741 for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
2742 E = Uses.end(); I != E; ++I) {
2743 const LSRUse &LU = *I;
2744 dbgs() << " ";
2745 LU.print(OS);
2746 OS << '\n';
2747 for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
2748 JE = LU.Formulae.end(); J != JE; ++J) {
2749 OS << " ";
2750 J->print(OS);
2751 OS << "\n";
2752 }
2753 }
2754}
2755
2756void LSRInstance::dump() const {
2757 print(errs()); errs() << '\n';
2758}
2759
2760namespace {
2761
2762class LoopStrengthReduce : public LoopPass {
2763 /// TLI - Keep a pointer of a TargetLowering to consult for determining
2764 /// transformation profitability.
2765 const TargetLowering *const TLI;
2766
2767public:
2768 static char ID; // Pass ID, replacement for typeid
2769 explicit LoopStrengthReduce(const TargetLowering *tli = NULL);
2770
2771private:
2772 bool runOnLoop(Loop *L, LPPassManager &LPM);
2773 void getAnalysisUsage(AnalysisUsage &AU) const;
2774};
2775
2776}
2777
2778char LoopStrengthReduce::ID = 0;
2779static RegisterPass<LoopStrengthReduce>
2780X("loop-reduce", "Loop Strength Reduction");
2781
2782Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
2783 return new LoopStrengthReduce(TLI);
2784}
2785
2786LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
2787 : LoopPass(&ID), TLI(tli) {}
2788
2789void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
2790 // We split critical edges, so we change the CFG. However, we do update
2791 // many analyses if they are around.
2792 AU.addPreservedID(LoopSimplifyID);
2793 AU.addPreserved<LoopInfo>();
2794 AU.addPreserved("domfrontier");
2795
2796 AU.addRequiredID(LoopSimplifyID);
2797 AU.addRequired<DominatorTree>();
2798 AU.addPreserved<DominatorTree>();
2799 AU.addRequired<ScalarEvolution>();
2800 AU.addPreserved<ScalarEvolution>();
2801 AU.addRequired<IVUsers>();
2802 AU.addPreserved<IVUsers>();
2803}
2804
2805bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
2806 bool Changed = false;
2807
2808 // Run the main LSR transformation.
2809 Changed |= LSRInstance(TLI, L, this).getChanged();
2810
Dan Gohmanafc36a92009-05-02 18:29:22 +00002811 // At this point, it is worth checking to see if any recurrence PHIs are also
Dan Gohman35738ac2009-05-04 22:30:44 +00002812 // dead, so that we can remove them as well.
Dan Gohman9fff2182010-01-05 16:31:45 +00002813 Changed |= DeleteDeadPHIs(L->getHeader());
Dan Gohmanafc36a92009-05-02 18:29:22 +00002814
Evan Cheng1ce75dc2008-07-07 19:51:32 +00002815 return Changed;
Nate Begemaneaa13852004-10-18 21:08:22 +00002816}