blob: 6456ca1eb0c147d74dbe7ecf4e9ce65801745d01 [file] [log] [blame]
Dan Gohman2d1be872009-04-16 03:18:22 +00001//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
Misha Brukmanfd939082005-04-21 23:48:37 +00002//
Nate Begemaneaa13852004-10-18 21:08:22 +00003// The LLVM Compiler Infrastructure
4//
Chris Lattner4ee451d2007-12-29 20:36:04 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Misha Brukmanfd939082005-04-21 23:48:37 +00007//
Nate Begemaneaa13852004-10-18 21:08:22 +00008//===----------------------------------------------------------------------===//
9//
Dan Gohmancec8f9d2009-05-19 20:37:36 +000010// This transformation analyzes and transforms the induction variables (and
11// computations derived from them) into forms suitable for efficient execution
12// on the target.
13//
Nate Begemaneaa13852004-10-18 21:08:22 +000014// This pass performs a strength reduction on array references inside loops that
Dan Gohmancec8f9d2009-05-19 20:37:36 +000015// have as one or more of their components the loop induction variable, it
16// rewrites expressions to take advantage of scaled-index addressing modes
17// available on the target, and it performs a variety of other optimizations
18// related to loop induction variables.
Nate Begemaneaa13852004-10-18 21:08:22 +000019//
Dan Gohmana10756e2010-01-21 02:09:26 +000020// Terminology note: this code has a lot of handling for "post-increment" or
21// "post-inc" users. This is not talking about post-increment addressing modes;
22// it is instead talking about code like this:
23//
24// %i = phi [ 0, %entry ], [ %i.next, %latch ]
25// ...
26// %i.next = add %i, 1
27// %c = icmp eq %i.next, %n
28//
29// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
30// it's useful to think about these as the same register, with some uses using
31// the value of the register before the add and some using // it after. In this
32// example, the icmp is a post-increment user, since it uses %i.next, which is
33// the value of the induction variable after the increment. The other common
34// case of post-increment users is users outside the loop.
35//
36// TODO: More sophistication in the way Formulae are generated.
37//
38// TODO: Handle multiple loops at a time.
39//
40// TODO: test/CodeGen/X86/full-lsr.ll should get full lsr. The problem is
41// that {0,+,1}<%bb> is getting picked first because all 7 uses can
42// use it, and while it's a pretty good solution, it means that LSR
43// doesn't look further to find an even better solution.
44//
45// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
46// instead of a GlobalValue?
47//
48// TODO: When truncation is free, truncate ICmp users' operands to make it a
49// smaller encoding (on x86 at least).
50//
51// TODO: When a negated register is used by an add (such as in a list of
52// multiple base registers, or as the increment expression in an addrec),
53// we may not actually need both reg and (-1 * reg) in registers; the
54// negation can be implemented by using a sub instead of an add. The
55// lack of support for taking this into consideration when making
56// register pressure decisions is partly worked around by the "Special"
57// use kind.
58//
Nate Begemaneaa13852004-10-18 21:08:22 +000059//===----------------------------------------------------------------------===//
60
Chris Lattnerbe3e5212005-08-03 23:30:08 +000061#define DEBUG_TYPE "loop-reduce"
Nate Begemaneaa13852004-10-18 21:08:22 +000062#include "llvm/Transforms/Scalar.h"
63#include "llvm/Constants.h"
64#include "llvm/Instructions.h"
Dan Gohmane5b01be2007-05-04 14:59:09 +000065#include "llvm/IntrinsicInst.h"
Jeff Cohen2f3c9b72005-03-04 04:04:26 +000066#include "llvm/DerivedTypes.h"
Dan Gohman81db61a2009-05-12 02:17:14 +000067#include "llvm/Analysis/IVUsers.h"
Dan Gohmana10756e2010-01-21 02:09:26 +000068#include "llvm/Analysis/Dominators.h"
Devang Patel0f54dcb2007-03-06 21:14:09 +000069#include "llvm/Analysis/LoopPass.h"
Nate Begeman16997482005-07-30 00:15:07 +000070#include "llvm/Analysis/ScalarEvolutionExpander.h"
Chris Lattnere0391be2005-08-12 22:06:11 +000071#include "llvm/Transforms/Utils/BasicBlockUtils.h"
Nate Begemaneaa13852004-10-18 21:08:22 +000072#include "llvm/Transforms/Utils/Local.h"
Dan Gohmana10756e2010-01-21 02:09:26 +000073#include "llvm/ADT/SmallBitVector.h"
74#include "llvm/ADT/SetVector.h"
Nate Begeman16997482005-07-30 00:15:07 +000075#include "llvm/Support/Debug.h"
Dan Gohmanafc36a92009-05-02 18:29:22 +000076#include "llvm/Support/ValueHandle.h"
Daniel Dunbar460f6562009-07-26 09:48:23 +000077#include "llvm/Support/raw_ostream.h"
Evan Chengd277f2c2006-03-13 23:14:23 +000078#include "llvm/Target/TargetLowering.h"
Jeff Cohencfb1d422005-07-30 18:22:27 +000079#include <algorithm>
Nate Begemaneaa13852004-10-18 21:08:22 +000080using namespace llvm;
81
Dan Gohmana10756e2010-01-21 02:09:26 +000082namespace {
Nate Begemaneaa13852004-10-18 21:08:22 +000083
Dan Gohmana10756e2010-01-21 02:09:26 +000084// Constant strides come first which in turns are sorted by their absolute
85// values. If absolute values are the same, then positive strides comes first.
86// e.g.
87// 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X
88struct StrideCompare {
89 const ScalarEvolution &SE;
90 explicit StrideCompare(const ScalarEvolution &se) : SE(se) {}
91
92 bool operator()(const SCEV *const &LHS, const SCEV *const &RHS) const {
93 const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS);
94 const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
95 if (LHSC && RHSC) {
96 unsigned BitWidth = std::max(SE.getTypeSizeInBits(LHS->getType()),
97 SE.getTypeSizeInBits(RHS->getType()));
98 APInt LV = LHSC->getValue()->getValue();
99 APInt RV = RHSC->getValue()->getValue();
100 LV.sextOrTrunc(BitWidth);
101 RV.sextOrTrunc(BitWidth);
102 APInt ALV = LV.abs();
103 APInt ARV = RV.abs();
104 if (ALV == ARV) {
105 if (LV != RV)
106 return LV.sgt(RV);
107 } else {
108 return ALV.ult(ARV);
109 }
110
111 // If it's the same value but different type, sort by bit width so
112 // that we emit larger induction variables before smaller
113 // ones, letting the smaller be re-written in terms of larger ones.
114 return SE.getTypeSizeInBits(RHS->getType()) <
115 SE.getTypeSizeInBits(LHS->getType());
116 }
117 return LHSC && !RHSC;
118 }
119};
120
121/// RegSortData - This class holds data which is used to order reuse
122/// candidates.
123class RegSortData {
124public:
125 /// Bits - This represents the set of LSRUses (by index) which reference a
126 /// particular register.
127 SmallBitVector Bits;
128
129 /// MaxComplexity - This represents the greatest complexity (see the comments
130 /// on Formula::getComplexity) seen with a particular register.
131 uint32_t MaxComplexity;
132
133 /// Index - This holds an arbitrary value used as a last-resort tie breaker
134 /// to ensure deterministic behavior.
135 unsigned Index;
136
137 RegSortData() {}
138
139 void print(raw_ostream &OS) const;
140 void dump() const;
141};
142
143}
144
145void RegSortData::print(raw_ostream &OS) const {
146 OS << "[NumUses=" << Bits.count()
147 << ", MaxComplexity=";
148 OS.write_hex(MaxComplexity);
149 OS << ", Index=" << Index << ']';
150}
151
152void RegSortData::dump() const {
153 print(errs()); errs() << '\n';
154}
Dan Gohmanc17e0cf2009-02-20 04:17:46 +0000155
Chris Lattner0e5f4992006-12-19 21:40:18 +0000156namespace {
Dale Johannesendc42f482007-03-20 00:47:50 +0000157
Dan Gohmana10756e2010-01-21 02:09:26 +0000158/// RegCount - This is a helper class to sort a given set of registers
159/// according to associated RegSortData values.
160class RegCount {
161public:
162 const SCEV *Reg;
163 RegSortData Sort;
Dale Johannesendc42f482007-03-20 00:47:50 +0000164
Dan Gohmana10756e2010-01-21 02:09:26 +0000165 RegCount(const SCEV *R, const RegSortData &RSD)
166 : Reg(R), Sort(RSD) {}
Evan Chengd1d6b5c2006-03-16 21:53:05 +0000167
Dan Gohmana10756e2010-01-21 02:09:26 +0000168 // Sort by count. Returning true means the register is preferred.
169 bool operator<(const RegCount &Other) const {
170 // Sort by the number of unique uses of this register.
171 unsigned A = Sort.Bits.count();
172 unsigned B = Other.Sort.Bits.count();
173 if (A != B) return A > B;
Evan Chengd1d6b5c2006-03-16 21:53:05 +0000174
Dan Gohmana10756e2010-01-21 02:09:26 +0000175 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
176 const SCEVAddRecExpr *BR = dyn_cast<SCEVAddRecExpr>(Other.Reg);
177 // AddRecs have higher priority than other things.
178 if (!BR) return true;
Evan Chengd1d6b5c2006-03-16 21:53:05 +0000179
Dan Gohmana10756e2010-01-21 02:09:26 +0000180 // Prefer affine values.
181 if (AR->isAffine() != BR->isAffine())
182 return AR->isAffine();
Nate Begeman16997482005-07-30 00:15:07 +0000183
Dan Gohmana10756e2010-01-21 02:09:26 +0000184 const Loop *AL = AR->getLoop();
185 const Loop *BL = BR->getLoop();
186 if (AL != BL) {
187 unsigned ADepth = AL->getLoopDepth();
188 unsigned BDepth = BL->getLoopDepth();
189 // Prefer a less deeply nested addrec.
190 if (ADepth != BDepth)
191 return ADepth < BDepth;
Chris Lattner7e608bb2005-08-02 02:52:02 +0000192
Dan Gohmana10756e2010-01-21 02:09:26 +0000193 // Different loops at the same depth; do something arbitrary.
194 BasicBlock *AH = AL->getHeader();
195 BasicBlock *BH = BL->getHeader();
196 for (Function::iterator I = AH, E = AH->getParent()->end(); I != E; ++I)
197 if (&*I == BH) return true;
198 return false;
Bill Wendling411052b2008-11-29 03:43:04 +0000199 }
Jim Grosbach56a1f802009-11-17 17:53:56 +0000200
Dan Gohmana10756e2010-01-21 02:09:26 +0000201 // Sort addrecs by stride.
202 const SCEV *AStep = AR->getOperand(1);
203 const SCEV *BStep = BR->getOperand(1);
204 if (AStep != BStep) {
205 if (const SCEVConstant *AC = dyn_cast<SCEVConstant>(AStep)) {
206 const SCEVConstant *BC = dyn_cast<SCEVConstant>(BStep);
207 if (!BC) return true;
208 // Arbitrarily prefer wider registers.
209 if (AC->getValue()->getValue().getBitWidth() !=
210 BC->getValue()->getValue().getBitWidth())
211 return AC->getValue()->getValue().getBitWidth() >
212 BC->getValue()->getValue().getBitWidth();
213 // Ignore the sign bit, assuming that striding by a negative value
214 // is just as easy as by a positive value.
215 // Prefer the addrec with the lesser absolute value stride, as it
216 // will allow uses to have simpler addressing modes.
217 return AC->getValue()->getValue().abs()
218 .ult(BC->getValue()->getValue().abs());
219 }
220 }
221
222 // Then sort by the register which will permit the simplest uses.
223 // This is a heuristic; currently we only track the most complex use as a
224 // representative.
225 if (Sort.MaxComplexity != Other.Sort.MaxComplexity)
226 return Sort.MaxComplexity < Other.Sort.MaxComplexity;
227
228 // Then sort them by their start values.
229 const SCEV *AStart = AR->getStart();
230 const SCEV *BStart = BR->getStart();
231 if (AStart != BStart) {
232 if (const SCEVConstant *AC = dyn_cast<SCEVConstant>(AStart)) {
233 const SCEVConstant *BC = dyn_cast<SCEVConstant>(BStart);
234 if (!BC) return true;
235 // Arbitrarily prefer wider registers.
236 if (AC->getValue()->getValue().getBitWidth() !=
237 BC->getValue()->getValue().getBitWidth())
238 return AC->getValue()->getValue().getBitWidth() >
239 BC->getValue()->getValue().getBitWidth();
240 // Prefer positive over negative if the absolute values are the same.
241 if (AC->getValue()->getValue().abs() ==
242 BC->getValue()->getValue().abs())
243 return AC->getValue()->getValue().isStrictlyPositive();
244 // Prefer the addrec with the lesser absolute value start.
245 return AC->getValue()->getValue().abs()
246 .ult(BC->getValue()->getValue().abs());
247 }
248 }
249 } else {
250 // AddRecs have higher priority than other things.
251 if (isa<SCEVAddRecExpr>(Other.Reg)) return false;
252 // Sort by the register which will permit the simplest uses.
253 // This is a heuristic; currently we only track the most complex use as a
254 // representative.
255 if (Sort.MaxComplexity != Other.Sort.MaxComplexity)
256 return Sort.MaxComplexity < Other.Sort.MaxComplexity;
257 }
258
259
260 // Tie-breaker: the arbitrary index, to ensure a reliable ordering.
261 return Sort.Index < Other.Sort.Index;
Nate Begemaneaa13852004-10-18 21:08:22 +0000262 }
Dan Gohmana10756e2010-01-21 02:09:26 +0000263
264 void print(raw_ostream &OS) const;
265 void dump() const;
266};
267
268}
269
270void RegCount::print(raw_ostream &OS) const {
271 OS << *Reg << ':';
272 Sort.print(OS);
273}
274
275void RegCount::dump() const {
276 print(errs()); errs() << '\n';
277}
278
279namespace {
280
281/// Formula - This class holds information that describes a formula for
282/// satisfying a use. It may include broken-out immediates and scaled registers.
283struct Formula {
284 /// AM - This is used to represent complex addressing, as well as other kinds
285 /// of interesting uses.
286 TargetLowering::AddrMode AM;
287
288 /// BaseRegs - The list of "base" registers for this use. When this is
289 /// non-empty, AM.HasBaseReg should be set to true.
290 SmallVector<const SCEV *, 2> BaseRegs;
291
292 /// ScaledReg - The 'scaled' register for this use. This should be non-null
293 /// when AM.Scale is not zero.
294 const SCEV *ScaledReg;
295
296 Formula() : ScaledReg(0) {}
297
298 unsigned getNumRegs() const;
299 uint32_t getComplexity() const;
300 const Type *getType() const;
301
302 void InitialMatch(const SCEV *S, Loop *L,
303 ScalarEvolution &SE, DominatorTree &DT);
304
305 /// referencesReg - Test if this formula references the given register.
306 bool referencesReg(const SCEV *S) const {
307 return S == ScaledReg ||
308 std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
309 }
310
311 bool operator==(const Formula &Other) const {
312 return BaseRegs == Other.BaseRegs &&
313 ScaledReg == Other.ScaledReg &&
314 AM.Scale == Other.AM.Scale &&
315 AM.BaseOffs == Other.AM.BaseOffs &&
316 AM.BaseGV == Other.AM.BaseGV;
317 }
318
319 // This sorts at least partially based on host pointer values which are
320 // not deterministic, so it is only usable for uniqification.
321 bool operator<(const Formula &Other) const {
322 if (BaseRegs != Other.BaseRegs)
323 return BaseRegs < Other.BaseRegs;
324 if (ScaledReg != Other.ScaledReg)
325 return ScaledReg < Other.ScaledReg;
326 if (AM.Scale != Other.AM.Scale)
327 return AM.Scale < Other.AM.Scale;
328 if (AM.BaseOffs != Other.AM.BaseOffs)
329 return AM.BaseOffs < Other.AM.BaseOffs;
330 if (AM.BaseGV != Other.AM.BaseGV)
331 return AM.BaseGV < Other.AM.BaseGV;
332 return false;
333 }
334
335 void print(raw_ostream &OS) const;
336 void dump() const;
337};
338
339}
340
341/// getNumRegs - Return the total number of register operands used by this
342/// formula.
343unsigned Formula::getNumRegs() const {
344 return !!ScaledReg + BaseRegs.size();
345}
346
347/// getComplexity - Return an oversimplified value indicating the complexity
348/// of this formula. This is used as a tie-breaker in choosing register
349/// preferences.
350uint32_t Formula::getComplexity() const {
351 // Encode the information in a uint32_t so that comparing with operator<
352 // will be interesting.
353 return
354 // Most significant, the number of registers. This saturates because we
355 // need the bits, and because beyond a few hundred it doesn't really matter.
356 (std::min(getNumRegs(), (1u<<15)-1) << 17) |
357 // Having multiple base regs is worse than having a base reg and a scale.
358 ((BaseRegs.size() > 1) << 16) |
359 // Scale absolute value.
360 ((AM.Scale != 0 ? (Log2_64(abs64(AM.Scale)) + 1) : 0u) << 9) |
361 // Scale sign, which is less significant than the absolute value.
362 ((AM.Scale < 0) << 8) |
363 // Offset absolute value.
364 ((AM.BaseOffs != 0 ? (Log2_64(abs64(AM.BaseOffs)) + 1) : 0u) << 1) |
365 // If a GV is present, treat it like a maximal offset.
366 ((AM.BaseGV ? ((1u<<7)-1) : 0) << 1) |
367 // Offset sign, which is less significant than the absolute offset.
368 ((AM.BaseOffs < 0) << 0);
369}
370
371/// getType - Return the type of this formula, if it has one, or null
372/// otherwise. This type is meaningless except for the bit size.
373const Type *Formula::getType() const {
374 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
375 ScaledReg ? ScaledReg->getType() :
376 AM.BaseGV ? AM.BaseGV->getType() :
377 0;
378}
379
380namespace {
381
382/// ComplexitySorter - A predicate which orders Formulae by the number of
383/// registers they contain.
384struct ComplexitySorter {
385 bool operator()(const Formula &LHS, const Formula &RHS) const {
386 unsigned L = LHS.getNumRegs();
387 unsigned R = RHS.getNumRegs();
388 if (L != R) return L < R;
389
390 return LHS.getComplexity() < RHS.getComplexity();
391 }
392};
393
394}
395
396/// DoInitialMatch - Recurrsion helper for InitialMatch.
397static void DoInitialMatch(const SCEV *S, Loop *L,
398 SmallVectorImpl<const SCEV *> &Good,
399 SmallVectorImpl<const SCEV *> &Bad,
400 ScalarEvolution &SE, DominatorTree &DT) {
401 // Collect expressions which properly dominate the loop header.
402 if (S->properlyDominates(L->getHeader(), &DT)) {
403 Good.push_back(S);
404 return;
405 }
406
407 // Look at add operands.
408 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
409 for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
410 I != E; ++I)
411 DoInitialMatch(*I, L, Good, Bad, SE, DT);
412 return;
413 }
414
415 // Look at addrec operands.
416 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
417 if (!AR->getStart()->isZero()) {
418 DoInitialMatch(AR->getStart(), L, Good, Bad, SE, DT);
419 DoInitialMatch(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()),
420 AR->getStepRecurrence(SE),
421 AR->getLoop()),
422 L, Good, Bad, SE, DT);
423 return;
424 }
425 }
426
427 // Handle a multiplication by -1 (negation) if it didn't fold.
428 if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
429 if (Mul->getOperand(0)->isAllOnesValue()) {
430 SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end());
431 const SCEV *NewMul = SE.getMulExpr(Ops);
432
433 SmallVector<const SCEV *, 4> MyGood;
434 SmallVector<const SCEV *, 4> MyBad;
435 DoInitialMatch(NewMul, L, MyGood, MyBad, SE, DT);
436 const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
437 SE.getEffectiveSCEVType(NewMul->getType())));
438 for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(),
439 E = MyGood.end(); I != E; ++I)
440 Good.push_back(SE.getMulExpr(NegOne, *I));
441 for (SmallVectorImpl<const SCEV *>::const_iterator I = MyBad.begin(),
442 E = MyBad.end(); I != E; ++I)
443 Bad.push_back(SE.getMulExpr(NegOne, *I));
444 return;
445 }
446
447 // Ok, we can't do anything interesting. Just stuff the whole thing into a
448 // register and hope for the best.
449 Bad.push_back(S);
450}
451
452/// InitialMatch - Incorporate loop-variant parts of S into this Formula,
453/// attempting to keep all loop-invariant and loop-computable values in a
454/// single base register.
455void Formula::InitialMatch(const SCEV *S, Loop *L,
456 ScalarEvolution &SE, DominatorTree &DT) {
457 SmallVector<const SCEV *, 4> Good;
458 SmallVector<const SCEV *, 4> Bad;
459 DoInitialMatch(S, L, Good, Bad, SE, DT);
460 if (!Good.empty()) {
461 BaseRegs.push_back(SE.getAddExpr(Good));
462 AM.HasBaseReg = true;
463 }
464 if (!Bad.empty()) {
465 BaseRegs.push_back(SE.getAddExpr(Bad));
466 AM.HasBaseReg = true;
467 }
468}
469
470void Formula::print(raw_ostream &OS) const {
471 bool First = true;
472 if (AM.BaseGV) {
473 if (!First) OS << " + "; else First = false;
474 WriteAsOperand(OS, AM.BaseGV, /*PrintType=*/false);
475 }
476 if (AM.BaseOffs != 0) {
477 if (!First) OS << " + "; else First = false;
478 OS << AM.BaseOffs;
479 }
480 for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
481 E = BaseRegs.end(); I != E; ++I) {
482 if (!First) OS << " + "; else First = false;
483 OS << "reg(";
484 OS << **I;
485 OS << ")";
486 }
487 if (AM.Scale != 0) {
488 if (!First) OS << " + "; else First = false;
489 OS << AM.Scale << "*reg(";
490 if (ScaledReg)
491 OS << *ScaledReg;
492 else
493 OS << "<unknown>";
494 OS << ")";
495 }
496}
497
498void Formula::dump() const {
499 print(errs()); errs() << '\n';
500}
501
502/// getSDiv - Return an expression for LHS /s RHS, if it can be determined,
503/// or null otherwise. If IgnoreSignificantBits is true, expressions like
504/// (X * Y) /s Y are simplified to Y, ignoring that the multiplication may
505/// overflow, which is useful when the result will be used in a context where
506/// the most significant bits are ignored.
507static const SCEV *getSDiv(const SCEV *LHS, const SCEV *RHS,
508 ScalarEvolution &SE,
509 bool IgnoreSignificantBits = false) {
510 // Handle the trivial case, which works for any SCEV type.
511 if (LHS == RHS)
512 return SE.getIntegerSCEV(1, LHS->getType());
513
514 // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do some
515 // folding.
516 if (RHS->isAllOnesValue())
517 return SE.getMulExpr(LHS, RHS);
518
519 // Check for a division of a constant by a constant.
520 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
521 const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
522 if (!RC)
523 return 0;
524 if (C->getValue()->getValue().srem(RC->getValue()->getValue()) != 0)
525 return 0;
526 return SE.getConstant(C->getValue()->getValue()
527 .sdiv(RC->getValue()->getValue()));
528 }
529
530 // Distribute the sdiv over addrec operands.
531 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
532 const SCEV *Start = getSDiv(AR->getStart(), RHS, SE,
533 IgnoreSignificantBits);
534 if (!Start) return 0;
535 const SCEV *Step = getSDiv(AR->getStepRecurrence(SE), RHS, SE,
536 IgnoreSignificantBits);
537 if (!Step) return 0;
538 return SE.getAddRecExpr(Start, Step, AR->getLoop());
539 }
540
541 // Distribute the sdiv over add operands.
542 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
543 SmallVector<const SCEV *, 8> Ops;
544 for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
545 I != E; ++I) {
546 const SCEV *Op = getSDiv(*I, RHS, SE,
547 IgnoreSignificantBits);
548 if (!Op) return 0;
549 Ops.push_back(Op);
550 }
551 return SE.getAddExpr(Ops);
552 }
553
554 // Check for a multiply operand that we can pull RHS out of.
555 if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS))
556 if (IgnoreSignificantBits || Mul->hasNoSignedWrap()) {
557 SmallVector<const SCEV *, 4> Ops;
558 bool Found = false;
559 for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end();
560 I != E; ++I) {
561 if (!Found)
562 if (const SCEV *Q = getSDiv(*I, RHS, SE, IgnoreSignificantBits)) {
563 Ops.push_back(Q);
564 Found = true;
565 continue;
566 }
567 Ops.push_back(*I);
568 }
569 return Found ? SE.getMulExpr(Ops) : 0;
570 }
571
572 // Otherwise we don't know.
573 return 0;
574}
575
576namespace {
577
578/// LSRUse - This class holds the state that LSR keeps for each use in
579/// IVUsers, as well as uses invented by LSR itself. It includes information
580/// about what kinds of things can be folded into the user, information
581/// about the user itself, and information about how the use may be satisfied.
582/// TODO: Represent multiple users of the same expression in common?
583class LSRUse {
584 SmallSet<Formula, 8> FormulaeUniquifier;
585
586public:
587 /// KindType - An enum for a kind of use, indicating what types of
588 /// scaled and immediate operands it might support.
589 enum KindType {
590 Basic, ///< A normal use, with no folding.
591 Special, ///< A special case of basic, allowing -1 scales.
592 Address, ///< An address use; folding according to TargetLowering
593 ICmpZero ///< An equality icmp with both operands folded into one.
594 // TODO: Add a generic icmp too?
595 };
596
597 KindType Kind;
598 const Type *AccessTy;
599 Instruction *UserInst;
600 Value *OperandValToReplace;
601
602 /// PostIncLoop - If this user is to use the post-incremented value of an
603 /// induction variable, this variable is non-null and holds the loop
604 /// associated with the induction variable.
605 const Loop *PostIncLoop;
606
607 /// Formulae - A list of ways to build a value that can satisfy this user.
608 /// After the list is populated, one of these is selected heuristically and
609 /// used to formulate a replacement for OperandValToReplace in UserInst.
610 SmallVector<Formula, 12> Formulae;
611
612 LSRUse() : Kind(Basic), AccessTy(0),
613 UserInst(0), OperandValToReplace(0), PostIncLoop(0) {}
614
615 void InsertInitialFormula(const SCEV *S, Loop *L,
616 ScalarEvolution &SE, DominatorTree &DT);
617 void InsertSupplementalFormula(const SCEV *S);
618
619 bool InsertFormula(const Formula &F);
620
621 void Rewrite(Loop *L, SCEVExpander &Rewriter,
622 SmallVectorImpl<WeakVH> &DeadInsts,
623 ScalarEvolution &SE, DominatorTree &DT,
624 Pass *P) const;
625
626 void print(raw_ostream &OS) const;
627 void dump() const;
628
629private:
630 Value *Expand(BasicBlock::iterator IP,
631 Loop *L, SCEVExpander &Rewriter,
632 SmallVectorImpl<WeakVH> &DeadInsts,
633 ScalarEvolution &SE, DominatorTree &DT) const;
634};
635
636}
637
638/// ExtractImmediate - If S involves the addition of a constant integer value,
639/// return that integer value, and mutate S to point to a new SCEV with that
640/// value excluded.
641static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
642 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
643 if (C->getValue()->getValue().getMinSignedBits() <= 64) {
644 S = SE.getIntegerSCEV(0, C->getType());
645 return C->getValue()->getSExtValue();
646 }
647 } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
648 SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
649 int64_t Result = ExtractImmediate(NewOps.front(), SE);
650 S = SE.getAddExpr(NewOps);
651 return Result;
652 } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
653 SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
654 int64_t Result = ExtractImmediate(NewOps.front(), SE);
655 S = SE.getAddRecExpr(NewOps, AR->getLoop());
656 return Result;
657 }
658 return 0;
659}
660
661/// ExtractSymbol - If S involves the addition of a GlobalValue address,
662/// return that symbol, and mutate S to point to a new SCEV with that
663/// value excluded.
664static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
665 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
666 if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
667 S = SE.getIntegerSCEV(0, GV->getType());
668 return GV;
669 }
670 } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
671 SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
672 GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
673 S = SE.getAddExpr(NewOps);
674 return Result;
675 } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
676 SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
677 GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
678 S = SE.getAddRecExpr(NewOps, AR->getLoop());
679 return Result;
680 }
681 return 0;
682}
683
684/// isLegalUse - Test whether the use described by AM is "legal", meaning
685/// it can be completely folded into the user instruction at isel time.
686/// This includes address-mode folding and special icmp tricks.
687static bool isLegalUse(const TargetLowering::AddrMode &AM,
688 LSRUse::KindType Kind, const Type *AccessTy,
689 const TargetLowering *TLI) {
690 switch (Kind) {
691 case LSRUse::Address:
692 // If we have low-level target information, ask the target if it can
693 // completely fold this address.
694 if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy);
695
696 // Otherwise, just guess that reg+reg addressing is legal.
697 return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1;
698
699 case LSRUse::ICmpZero:
700 // There's not even a target hook for querying whether it would be legal
701 // to fold a GV into an ICmp.
702 if (AM.BaseGV)
703 return false;
704
705 // ICmp only has two operands; don't allow more than two non-trivial parts.
706 if (AM.Scale != 0 && AM.HasBaseReg && AM.BaseOffs != 0)
707 return false;
708
709 // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale
710 // by putting the scaled register in the other operand of the icmp.
711 if (AM.Scale != 0 && AM.Scale != -1)
712 return false;
713
714 // If we have low-level target information, ask the target if it can
715 // fold an integer immediate on an icmp.
716 if (AM.BaseOffs != 0) {
717 if (TLI) return TLI->isLegalICmpImmediate(-AM.BaseOffs);
718 return false;
719 }
720
721 return true;
722
723 case LSRUse::Basic:
724 // Only handle single-register values.
725 return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
726
727 case LSRUse::Special:
728 // Only handle -1 scales, or no scale.
729 return AM.Scale == 0 || AM.Scale == -1;
730 }
731
732 return false;
733}
734
735static bool isAlwaysFoldable(const SCEV *S,
736 bool HasBaseReg,
737 LSRUse::KindType Kind, const Type *AccessTy,
738 const TargetLowering *TLI,
739 ScalarEvolution &SE) {
740 // Fast-path: zero is always foldable.
741 if (S->isZero()) return true;
742
743 // Conservatively, create an address with an immediate and a
744 // base and a scale.
745 TargetLowering::AddrMode AM;
746 AM.BaseOffs = ExtractImmediate(S, SE);
747 AM.BaseGV = ExtractSymbol(S, SE);
748 AM.HasBaseReg = HasBaseReg;
749 AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
750
751 // If there's anything else involved, it's not foldable.
752 if (!S->isZero()) return false;
753
754 return isLegalUse(AM, Kind, AccessTy, TLI);
755}
756
757/// InsertFormula - If the given formula has not yet been inserted, add it
758/// to the list, and return true. Return false otherwise.
759bool LSRUse::InsertFormula(const Formula &F) {
760 Formula Copy = F;
761
762 // Sort the base regs, to avoid adding the same solution twice with
763 // the base regs in different orders. This uses host pointer values, but
764 // it doesn't matter since it's only used for uniquifying.
765 std::sort(Copy.BaseRegs.begin(), Copy.BaseRegs.end());
766
767 DEBUG(for (SmallVectorImpl<const SCEV *>::const_iterator I =
768 F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I)
769 assert(!(*I)->isZero() && "Zero allocated in a base register!");
770 assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
771 "Zero allocated in a scaled register!"));
772
773 if (FormulaeUniquifier.insert(Copy)) {
774 Formulae.push_back(F);
775 return true;
776 }
777
778 return false;
779}
780
781void
782LSRUse::InsertInitialFormula(const SCEV *S, Loop *L,
783 ScalarEvolution &SE, DominatorTree &DT) {
784 Formula F;
785 F.InitialMatch(S, L, SE, DT);
786 bool Inserted = InsertFormula(F);
787 assert(Inserted && "Initial formula already exists!"); (void)Inserted;
788}
789
790void
791LSRUse::InsertSupplementalFormula(const SCEV *S) {
792 Formula F;
793 F.BaseRegs.push_back(S);
794 F.AM.HasBaseReg = true;
795 bool Inserted = InsertFormula(F);
796 assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
797}
798
799/// getImmediateDominator - A handy utility for the specific DominatorTree
800/// query that we need here.
801///
802static BasicBlock *getImmediateDominator(BasicBlock *BB, DominatorTree &DT) {
803 DomTreeNode *Node = DT.getNode(BB);
804 if (!Node) return 0;
805 Node = Node->getIDom();
806 if (!Node) return 0;
807 return Node->getBlock();
808}
809
810Value *LSRUse::Expand(BasicBlock::iterator IP,
811 Loop *L, SCEVExpander &Rewriter,
812 SmallVectorImpl<WeakVH> &DeadInsts,
813 ScalarEvolution &SE, DominatorTree &DT) const {
814 // Then, collect some instructions which we will remain dominated by when
815 // expanding the replacement. These must be dominated by any operands that
816 // will be required in the expansion.
817 SmallVector<Instruction *, 4> Inputs;
818 if (Instruction *I = dyn_cast<Instruction>(OperandValToReplace))
819 Inputs.push_back(I);
820 if (Kind == ICmpZero)
821 if (Instruction *I =
822 dyn_cast<Instruction>(cast<ICmpInst>(UserInst)->getOperand(1)))
823 Inputs.push_back(I);
824 if (PostIncLoop && !L->contains(UserInst))
825 Inputs.push_back(L->getLoopLatch()->getTerminator());
826
827 // Then, climb up the immediate dominator tree as far as we can go while
828 // still being dominated by the input positions.
829 for (;;) {
830 bool AllDominate = true;
831 Instruction *BetterPos = 0;
832 BasicBlock *IDom = getImmediateDominator(IP->getParent(), DT);
833 if (!IDom) break;
834 Instruction *Tentative = IDom->getTerminator();
835 for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
836 E = Inputs.end(); I != E; ++I) {
837 Instruction *Inst = *I;
838 if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
839 AllDominate = false;
840 break;
841 }
842 if (IDom == Inst->getParent() &&
843 (!BetterPos || DT.dominates(BetterPos, Inst)))
844 BetterPos = next(BasicBlock::iterator(Inst));
845 }
846 if (!AllDominate)
847 break;
848 if (BetterPos)
849 IP = BetterPos;
850 else
851 IP = Tentative;
852 }
853 while (isa<PHINode>(IP)) ++IP;
854
855 // The first formula in the list is the winner.
856 const Formula &F = Formulae.front();
857
858 // Inform the Rewriter if we have a post-increment use, so that it can
859 // perform an advantageous expansion.
860 Rewriter.setPostInc(PostIncLoop);
861
862 // This is the type that the user actually needs.
863 const Type *OpTy = OperandValToReplace->getType();
864 // This will be the type that we'll initially expand to.
865 const Type *Ty = F.getType();
866 if (!Ty)
867 // No type known; just expand directly to the ultimate type.
868 Ty = OpTy;
869 else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
870 // Expand directly to the ultimate type if it's the right size.
871 Ty = OpTy;
872 // This is the type to do integer arithmetic in.
873 const Type *IntTy = SE.getEffectiveSCEVType(Ty);
874
875 // Build up a list of operands to add together to form the full base.
876 SmallVector<const SCEV *, 8> Ops;
877
878 // Expand the BaseRegs portion.
879 for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
880 E = F.BaseRegs.end(); I != E; ++I) {
881 const SCEV *Reg = *I;
882 assert(!Reg->isZero() && "Zero allocated in a base register!");
883
884 // If we're expanding for a post-inc user for the add-rec's loop, make the
885 // post-inc adjustment.
886 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg))
887 if (AR->getLoop() == PostIncLoop)
888 Reg = SE.getAddExpr(Reg, AR->getStepRecurrence(SE));
889
890 Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
891 }
892
893 // Expand the ScaledReg portion.
894 Value *ICmpScaledV = 0;
895 if (F.AM.Scale != 0) {
896 const SCEV *ScaledS = F.ScaledReg;
897
898 // If we're expanding for a post-inc user for the add-rec's loop, make the
899 // post-inc adjustment.
900 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ScaledS))
901 if (AR->getLoop() == PostIncLoop)
902 ScaledS = SE.getAddExpr(ScaledS, AR->getStepRecurrence(SE));
903
904 if (Kind == ICmpZero) {
905 // An interesting way of "folding" with an icmp is to use a negated
906 // scale, which we'll implement by inserting it into the other operand
907 // of the icmp.
908 assert(F.AM.Scale == -1 &&
909 "The only scale supported by ICmpZero uses is -1!");
910 ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP);
911 } else {
912 // Otherwise just expand the scaled register and an explicit scale,
913 // which is expected to be matched as part of the address.
914 ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
915 const Type *ScaledTy = SE.getEffectiveSCEVType(ScaledS->getType());
916 ScaledS = SE.getMulExpr(ScaledS,
917 SE.getSCEV(ConstantInt::get(ScaledTy,
918 F.AM.Scale)));
919 Ops.push_back(ScaledS);
920 }
921 }
922
923 // Expand the immediate portions.
924 if (F.AM.BaseGV)
925 Ops.push_back(SE.getSCEV(F.AM.BaseGV));
926 if (F.AM.BaseOffs != 0) {
927 if (Kind == ICmpZero) {
928 // The other interesting way of "folding" with an ICmpZero is to use a
929 // negated immediate.
930 if (!ICmpScaledV)
931 ICmpScaledV = ConstantInt::get(IntTy, -F.AM.BaseOffs);
932 else {
933 Ops.push_back(SE.getUnknown(ICmpScaledV));
934 ICmpScaledV = ConstantInt::get(IntTy, F.AM.BaseOffs);
935 }
936 } else {
937 // Just add the immediate values. These again are expected to be matched
938 // as part of the address.
939 Ops.push_back(SE.getSCEV(ConstantInt::get(IntTy, F.AM.BaseOffs)));
940 }
941 }
942
943 // Emit instructions summing all the operands.
944 const SCEV *FullS = Ops.empty() ?
945 SE.getIntegerSCEV(0, IntTy) :
946 SE.getAddExpr(Ops);
947 Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
948
949 // We're done expanding now, so reset the rewriter.
950 Rewriter.setPostInc(0);
951
952 // An ICmpZero Formula represents an ICmp which we're handling as a
953 // comparison against zero. Now that we've expanded an expression for that
954 // form, update the ICmp's other operand.
955 if (Kind == ICmpZero) {
956 ICmpInst *CI = cast<ICmpInst>(UserInst);
957 DeadInsts.push_back(CI->getOperand(1));
958 assert(!F.AM.BaseGV && "ICmp does not support folding a global value and "
959 "a scale at the same time!");
960 if (F.AM.Scale == -1) {
961 if (ICmpScaledV->getType() != OpTy) {
962 Instruction *Cast =
963 CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
964 OpTy, false),
965 ICmpScaledV, OpTy, "tmp", CI);
966 ICmpScaledV = Cast;
967 }
968 CI->setOperand(1, ICmpScaledV);
969 } else {
970 assert(F.AM.Scale == 0 &&
971 "ICmp does not support folding a global value and "
972 "a scale at the same time!");
973 Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
974 -(uint64_t)F.AM.BaseOffs);
975 if (C->getType() != OpTy)
976 C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
977 OpTy, false),
978 C, OpTy);
979
980 CI->setOperand(1, C);
981 }
982 }
983
984 return FullV;
985}
986
987/// Rewrite - Emit instructions for the leading candidate expression for this
988/// LSRUse (this is called "expanding"), and update the UserInst to reference
989/// the newly expanded value.
990void LSRUse::Rewrite(Loop *L, SCEVExpander &Rewriter,
991 SmallVectorImpl<WeakVH> &DeadInsts,
992 ScalarEvolution &SE, DominatorTree &DT,
993 Pass *P) const {
994 const Type *OpTy = OperandValToReplace->getType();
995
996 // First, find an insertion point that dominates UserInst. For PHI nodes,
997 // find the nearest block which dominates all the relevant uses.
998 if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
999 DenseMap<BasicBlock *, Value *> Inserted;
1000 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1001 if (PN->getIncomingValue(i) == OperandValToReplace) {
1002 BasicBlock *BB = PN->getIncomingBlock(i);
1003
1004 // If this is a critical edge, split the edge so that we do not insert
1005 // the code on all predecessor/successor paths. We do this unless this
1006 // is the canonical backedge for this loop, which complicates post-inc
1007 // users.
1008 if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
1009 !isa<IndirectBrInst>(BB->getTerminator()) &&
1010 (PN->getParent() != L->getHeader() || !L->contains(BB))) {
1011 // Split the critical edge.
1012 BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
1013
1014 // If PN is outside of the loop and BB is in the loop, we want to
1015 // move the block to be immediately before the PHI block, not
1016 // immediately after BB.
1017 if (L->contains(BB) && !L->contains(PN))
1018 NewBB->moveBefore(PN->getParent());
1019
1020 // Splitting the edge can reduce the number of PHI entries we have.
1021 e = PN->getNumIncomingValues();
1022 BB = NewBB;
1023 i = PN->getBasicBlockIndex(BB);
1024 }
1025
1026 std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
1027 Inserted.insert(std::make_pair(BB, static_cast<Value *>(0)));
1028 if (!Pair.second)
1029 PN->setIncomingValue(i, Pair.first->second);
1030 else {
1031 Value *FullV = Expand(BB->getTerminator(),
1032 L, Rewriter, DeadInsts, SE, DT);
1033
1034 // If this is reuse-by-noop-cast, insert the noop cast.
1035 if (FullV->getType() != OpTy)
1036 FullV =
1037 CastInst::Create(CastInst::getCastOpcode(FullV, false,
1038 OpTy, false),
1039 FullV, OperandValToReplace->getType(),
1040 "tmp", BB->getTerminator());
1041
1042 PN->setIncomingValue(i, FullV);
1043 Pair.first->second = FullV;
1044 }
1045 }
1046 } else {
1047 Value *FullV = Expand(UserInst, L, Rewriter, DeadInsts, SE, DT);
1048
1049 // If this is reuse-by-noop-cast, insert the noop cast.
1050 if (FullV->getType() != OpTy) {
1051 Instruction *Cast =
1052 CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
1053 FullV, OpTy, "tmp", UserInst);
1054 FullV = Cast;
1055 }
1056
1057 // Update the user.
1058 UserInst->replaceUsesOfWith(OperandValToReplace, FullV);
1059 }
1060
1061 DeadInsts.push_back(OperandValToReplace);
1062}
1063
1064void LSRUse::print(raw_ostream &OS) const {
1065 OS << "LSR Use: Kind=";
1066 switch (Kind) {
1067 case Basic: OS << "Basic"; break;
1068 case Special: OS << "Special"; break;
1069 case ICmpZero: OS << "ICmpZero"; break;
1070 case Address:
1071 OS << "Address of ";
1072 if (isa<PointerType>(AccessTy))
1073 OS << "pointer"; // the full pointer type could be really verbose
1074 else
1075 OS << *AccessTy;
1076 }
1077
1078 OS << ", UserInst=";
1079 // Store is common and interesting enough to be worth special-casing.
1080 if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1081 OS << "store ";
1082 WriteAsOperand(OS, Store->getOperand(0), /*PrintType=*/false);
1083 } else if (UserInst->getType()->isVoidTy())
1084 OS << UserInst->getOpcodeName();
1085 else
1086 WriteAsOperand(OS, UserInst, /*PrintType=*/false);
1087
1088 OS << ", OperandValToReplace=";
1089 WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false);
1090
1091 if (PostIncLoop) {
1092 OS << ", PostIncLoop=";
1093 WriteAsOperand(OS, PostIncLoop->getHeader(), /*PrintType=*/false);
1094 }
1095}
1096
1097void LSRUse::dump() const {
1098 print(errs()); errs() << '\n';
1099}
1100
1101namespace {
1102
1103/// Score - This class is used to measure and compare candidate formulae.
1104class Score {
1105 unsigned NumRegs;
1106 unsigned NumPhis;
1107 unsigned NumIVMuls;
1108 unsigned NumBaseAdds;
1109 unsigned NumImms;
1110
1111public:
1112 Score()
1113 : NumRegs(0), NumPhis(0), NumIVMuls(0), NumBaseAdds(0), NumImms(0) {}
1114
1115 void RateInitial(SmallVector<LSRUse, 16> const &Uses, const Loop *L,
1116 ScalarEvolution &SE);
1117
1118 void Rate(const SCEV *Reg, const SmallBitVector &Bits,
1119 const SmallVector<LSRUse, 16> &Uses, const Loop *L,
1120 ScalarEvolution &SE);
1121
1122 bool operator<(const Score &Other) const;
1123
1124 void print_details(raw_ostream &OS, const SCEV *Reg,
1125 const SmallPtrSet<const SCEV *, 8> &Regs) const;
1126
1127 void print(raw_ostream &OS) const;
1128 void dump() const;
1129
1130private:
1131 void RateRegister(const SCEV *Reg, SmallPtrSet<const SCEV *, 8> &Regs,
1132 const Loop *L);
1133 void RateFormula(const Formula &F, SmallPtrSet<const SCEV *, 8> &Regs,
1134 const Loop *L);
1135
1136 void Loose();
1137};
1138
1139}
1140
1141/// RateRegister - Tally up interesting quantities from the given register.
1142void Score::RateRegister(const SCEV *Reg,
1143 SmallPtrSet<const SCEV *, 8> &Regs,
1144 const Loop *L) {
1145 if (Regs.insert(Reg))
1146 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
1147 NumPhis += AR->getLoop() == L;
1148
1149 // Add the step value register, if it needs one.
1150 if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1)))
1151 RateRegister(AR->getOperand(1), Regs, L);
1152 }
1153}
1154
1155void Score::RateFormula(const Formula &F,
1156 SmallPtrSet<const SCEV *, 8> &Regs,
1157 const Loop *L) {
1158 // Tally up the registers.
1159 if (F.ScaledReg)
1160 RateRegister(F.ScaledReg, Regs, L);
1161 for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
1162 E = F.BaseRegs.end(); I != E; ++I) {
1163 const SCEV *BaseReg = *I;
1164 RateRegister(BaseReg, Regs, L);
1165
1166 NumIVMuls += isa<SCEVMulExpr>(BaseReg) &&
1167 BaseReg->hasComputableLoopEvolution(L);
1168 }
1169
1170 if (F.BaseRegs.size() > 1)
1171 NumBaseAdds += F.BaseRegs.size() - 1;
1172
1173 // Tally up the non-zero immediates.
1174 if (F.AM.BaseGV || F.AM.BaseOffs != 0)
1175 ++NumImms;
1176}
1177
1178/// Loose - Set this score to a loosing value.
1179void Score::Loose() {
1180 NumRegs = ~0u;
1181 NumPhis = ~0u;
1182 NumIVMuls = ~0u;
1183 NumBaseAdds = ~0u;
1184 NumImms = ~0u;
1185}
1186
1187/// RateInitial - Compute a score for the initial "fully reduced" solution.
1188void Score::RateInitial(SmallVector<LSRUse, 16> const &Uses, const Loop *L,
1189 ScalarEvolution &SE) {
1190 SmallPtrSet<const SCEV *, 8> Regs;
1191 for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
1192 E = Uses.end(); I != E; ++I)
1193 RateFormula(I->Formulae.front(), Regs, L);
1194 NumRegs += Regs.size();
1195
1196 DEBUG(print_details(dbgs(), 0, Regs));
1197}
1198
1199/// Rate - Compute a score for the solution where the reuse associated with
1200/// putting Reg in a register is selected.
1201void Score::Rate(const SCEV *Reg, const SmallBitVector &Bits,
1202 const SmallVector<LSRUse, 16> &Uses, const Loop *L,
1203 ScalarEvolution &SE) {
1204 SmallPtrSet<const SCEV *, 8> Regs;
1205 for (size_t i = 0, e = Uses.size(); i != e; ++i) {
1206 const LSRUse &LU = Uses[i];
1207
1208 const Formula *BestFormula = 0;
1209 if (i >= Bits.size() || !Bits.test(i))
1210 // This use doesn't use the current register. Just go with the current
1211 // leading candidate formula.
1212 BestFormula = &LU.Formulae.front();
1213 else
1214 // Find the best formula for this use that uses the current register.
1215 for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
1216 E = LU.Formulae.end(); I != E; ++I) {
1217 const Formula &F = *I;
1218 if (F.referencesReg(Reg) &&
1219 (!BestFormula || ComplexitySorter()(F, *BestFormula)))
1220 BestFormula = &F;
1221 }
1222
1223 // If we didn't find *any* forumlae, because earlier we eliminated some
1224 // in greedy fashion, skip the current register's reuse opportunity.
1225 if (!BestFormula) {
1226 DEBUG(dbgs() << "Reuse with reg " << *Reg
1227 << " wouldn't help any users.\n");
1228 Loose();
1229 return;
1230 }
1231
1232 // For an in-loop post-inc user, don't allow multiple base registers,
1233 // because that would require an awkward in-loop add after the increment.
1234 if (LU.PostIncLoop && LU.PostIncLoop->contains(LU.UserInst) &&
1235 BestFormula->BaseRegs.size() > 1) {
1236 DEBUG(dbgs() << "Reuse with reg " << *Reg
1237 << " would require an in-loop post-inc add: ";
1238 BestFormula->dump());
1239 Loose();
1240 return;
1241 }
1242
1243 RateFormula(*BestFormula, Regs, L);
1244 }
1245 NumRegs += Regs.size();
1246
1247 DEBUG(print_details(dbgs(), Reg, Regs));
1248}
1249
1250/// operator< - Choose the better score.
1251bool Score::operator<(const Score &Other) const {
1252 if (NumRegs != Other.NumRegs)
1253 return NumRegs < Other.NumRegs;
1254 if (NumPhis != Other.NumPhis)
1255 return NumPhis < Other.NumPhis;
1256 if (NumIVMuls != Other.NumIVMuls)
1257 return NumIVMuls < Other.NumIVMuls;
1258 if (NumBaseAdds != Other.NumBaseAdds)
1259 return NumBaseAdds < Other.NumBaseAdds;
1260 return NumImms < Other.NumImms;
1261}
1262
1263void Score::print_details(raw_ostream &OS,
1264 const SCEV *Reg,
1265 const SmallPtrSet<const SCEV *, 8> &Regs) const {
1266 if (Reg) OS << "Reuse with reg " << *Reg << " would require ";
1267 else OS << "The initial solution would require ";
1268 print(OS);
1269 OS << ". Regs:";
1270 for (SmallPtrSet<const SCEV *, 8>::const_iterator I = Regs.begin(),
1271 E = Regs.end(); I != E; ++I)
1272 OS << ' ' << **I;
1273 OS << '\n';
1274}
1275
1276void Score::print(raw_ostream &OS) const {
1277 OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
1278 if (NumPhis != 0)
1279 OS << ", including " << NumPhis << " PHI" << (NumPhis == 1 ? "" : "s");
1280 if (NumIVMuls != 0)
1281 OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s");
1282 if (NumBaseAdds != 0)
1283 OS << ", plus " << NumBaseAdds << " base add"
1284 << (NumBaseAdds == 1 ? "" : "s");
1285 if (NumImms != 0)
1286 OS << ", plus " << NumImms << " imm" << (NumImms == 1 ? "" : "s");
1287}
1288
1289void Score::dump() const {
1290 print(errs()); errs() << '\n';
Nate Begemaneaa13852004-10-18 21:08:22 +00001291}
1292
Dan Gohmanf284ce22009-02-18 00:08:39 +00001293/// isAddressUse - Returns true if the specified instruction is using the
Dale Johannesen203af582008-12-05 21:47:27 +00001294/// specified value as an address.
1295static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
1296 bool isAddress = isa<LoadInst>(Inst);
1297 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
1298 if (SI->getOperand(1) == OperandVal)
1299 isAddress = true;
1300 } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
1301 // Addressing modes can also be folded into prefetches and a variety
1302 // of intrinsics.
1303 switch (II->getIntrinsicID()) {
1304 default: break;
1305 case Intrinsic::prefetch:
1306 case Intrinsic::x86_sse2_loadu_dq:
1307 case Intrinsic::x86_sse2_loadu_pd:
1308 case Intrinsic::x86_sse_loadu_ps:
1309 case Intrinsic::x86_sse_storeu_ps:
1310 case Intrinsic::x86_sse2_storeu_pd:
1311 case Intrinsic::x86_sse2_storeu_dq:
1312 case Intrinsic::x86_sse2_storel_dq:
1313 if (II->getOperand(1) == OperandVal)
1314 isAddress = true;
1315 break;
1316 }
1317 }
1318 return isAddress;
1319}
Chris Lattner0ae33eb2005-10-03 01:04:44 +00001320
Dan Gohman21e77222009-03-09 21:01:17 +00001321/// getAccessType - Return the type of the memory being accessed.
1322static const Type *getAccessType(const Instruction *Inst) {
Dan Gohmana537bf82009-05-18 16:45:28 +00001323 const Type *AccessTy = Inst->getType();
Dan Gohman21e77222009-03-09 21:01:17 +00001324 if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
Dan Gohmana537bf82009-05-18 16:45:28 +00001325 AccessTy = SI->getOperand(0)->getType();
Dan Gohman21e77222009-03-09 21:01:17 +00001326 else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
1327 // Addressing modes can also be folded into prefetches and a variety
1328 // of intrinsics.
1329 switch (II->getIntrinsicID()) {
1330 default: break;
1331 case Intrinsic::x86_sse_storeu_ps:
1332 case Intrinsic::x86_sse2_storeu_pd:
1333 case Intrinsic::x86_sse2_storeu_dq:
1334 case Intrinsic::x86_sse2_storel_dq:
Dan Gohmana537bf82009-05-18 16:45:28 +00001335 AccessTy = II->getOperand(1)->getType();
Dan Gohman21e77222009-03-09 21:01:17 +00001336 break;
1337 }
1338 }
Dan Gohmana537bf82009-05-18 16:45:28 +00001339 return AccessTy;
Dan Gohman21e77222009-03-09 21:01:17 +00001340}
1341
Dan Gohmana10756e2010-01-21 02:09:26 +00001342/// DeleteTriviallyDeadInstructions - If any of the instructions is the
1343/// specified set are trivially dead, delete them and see if this makes any of
1344/// their operands subsequently dead.
1345static bool
1346DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
1347 bool Changed = false;
Jim Grosbach56a1f802009-11-17 17:53:56 +00001348
Dan Gohmana10756e2010-01-21 02:09:26 +00001349 while (!DeadInsts.empty()) {
1350 Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
Nate Begeman16997482005-07-30 00:15:07 +00001351
Dan Gohmana10756e2010-01-21 02:09:26 +00001352 if (I == 0 || !isInstructionTriviallyDead(I))
Evan Cheng55e641b2008-03-19 22:02:26 +00001353 continue;
Jim Grosbach56a1f802009-11-17 17:53:56 +00001354
Dan Gohmana10756e2010-01-21 02:09:26 +00001355 for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
1356 if (Instruction *U = dyn_cast<Instruction>(*OI)) {
1357 *OI = 0;
1358 if (U->use_empty())
1359 DeadInsts.push_back(U);
Dan Gohman2acc7602007-05-03 23:20:33 +00001360 }
Dan Gohman02e4fa72007-10-22 20:40:42 +00001361
Dan Gohmana10756e2010-01-21 02:09:26 +00001362 I->eraseFromParent();
Dan Gohmanafc36a92009-05-02 18:29:22 +00001363 Changed = true;
Evan Chengcdf43b12007-10-25 09:11:16 +00001364 }
1365
Dan Gohmana10756e2010-01-21 02:09:26 +00001366 return Changed;
Evan Chengcdf43b12007-10-25 09:11:16 +00001367}
1368
Dan Gohmana10756e2010-01-21 02:09:26 +00001369namespace {
Dan Gohmanad7321f2008-09-15 21:22:06 +00001370
Dan Gohmana10756e2010-01-21 02:09:26 +00001371/// LSRInstance - This class holds state for the main loop strength
1372/// reduction logic.
1373class LSRInstance {
1374 IVUsers &IU;
1375 ScalarEvolution &SE;
1376 DominatorTree &DT;
1377 const TargetLowering *const TLI;
1378 Loop *const L;
1379 bool Changed;
Dan Gohmanad7321f2008-09-15 21:22:06 +00001380
Dan Gohmana10756e2010-01-21 02:09:26 +00001381 /// CurrentArbitraryRegIndex - To ensure a deterministic ordering, assign an
1382 /// arbitrary index value to each register as a sort tie breaker.
1383 unsigned CurrentArbitraryRegIndex;
Dan Gohmanad7321f2008-09-15 21:22:06 +00001384
Dan Gohmana10756e2010-01-21 02:09:26 +00001385 /// Factors - Interesting factors between use strides.
1386 SmallSetVector<int64_t, 4> Factors;
Dan Gohmanad7321f2008-09-15 21:22:06 +00001387
Dan Gohmana10756e2010-01-21 02:09:26 +00001388 /// Types - Interesting use types, to facilitate truncation reuse.
1389 SmallSetVector<const Type *, 4> Types;
Dan Gohmanad7321f2008-09-15 21:22:06 +00001390
Dan Gohmana10756e2010-01-21 02:09:26 +00001391 /// Uses - The list of interesting uses.
1392 SmallVector<LSRUse, 16> Uses;
Dan Gohman4d1c1ef2009-06-19 23:03:46 +00001393
Dan Gohmana10756e2010-01-21 02:09:26 +00001394 // TODO: Reorganize these data structures.
1395 typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
1396 RegUsesTy RegUses;
1397 SmallVector<const SCEV *, 16> RegSequence;
Dan Gohmanad7321f2008-09-15 21:22:06 +00001398
Dan Gohmana10756e2010-01-21 02:09:26 +00001399 void OptimizeShadowIV();
1400 bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
1401 const SCEV* &CondStride);
1402 ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
1403 bool StrideMightBeShared(const SCEV* Stride);
1404 bool OptimizeLoopTermCond(Instruction *&IVIncInsertPos);
Dan Gohmanad7321f2008-09-15 21:22:06 +00001405
Dan Gohmana10756e2010-01-21 02:09:26 +00001406 LSRUse &getNewUse() {
1407 Uses.push_back(LSRUse());
1408 return Uses.back();
1409 }
Dan Gohmanbc10b8c2009-03-04 20:49:01 +00001410
Dan Gohmana10756e2010-01-21 02:09:26 +00001411 void CountRegister(const SCEV *Reg, uint32_t Complexity, size_t LUIdx);
1412 void CountRegisters(const Formula &F, size_t LUIdx);
Dan Gohmanad7321f2008-09-15 21:22:06 +00001413
Dan Gohmana10756e2010-01-21 02:09:26 +00001414 void GenerateSymbolicOffsetReuse(LSRUse &LU, unsigned LUIdx,
1415 Formula Base);
1416 void GenerateICmpZeroScaledReuse(LSRUse &LU, unsigned LUIdx,
1417 Formula Base);
1418 void GenerateFormulaeFromReplacedBaseReg(LSRUse &LU,
1419 unsigned LUIdx,
1420 const Formula &Base, unsigned i,
1421 const SmallVectorImpl<const SCEV *>
1422 &AddOps);
1423 void GenerateReassociationReuse(LSRUse &LU, unsigned LUIdx,
1424 Formula Base);
1425 void GenerateCombinationReuse(LSRUse &LU, unsigned LUIdx,
1426 Formula Base);
1427 void GenerateScaledReuse(LSRUse &LU, unsigned LUIdx,
1428 Formula Base);
1429 void GenerateTruncateReuse(LSRUse &LU, unsigned LUIdx,
1430 Formula Base);
Dan Gohman2781f302009-06-19 23:23:27 +00001431
Dan Gohmana10756e2010-01-21 02:09:26 +00001432 void GenerateConstantOffsetReuse();
Dan Gohmanad7321f2008-09-15 21:22:06 +00001433
Dan Gohmana10756e2010-01-21 02:09:26 +00001434 void GenerateAllReuseFormulae();
1435
1436 void GenerateLoopInvariantRegisterUses();
1437
1438public:
1439 LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
1440
1441 bool getChanged() const { return Changed; }
1442
1443 void print(raw_ostream &OS) const;
1444 void dump() const;
1445};
1446
Dan Gohmanad7321f2008-09-15 21:22:06 +00001447}
1448
Devang Patela0b39092008-08-26 17:57:54 +00001449/// OptimizeShadowIV - If IV is used in a int-to-float cast
1450/// inside the loop then try to eliminate the cast opeation.
Dan Gohmana10756e2010-01-21 02:09:26 +00001451void LSRInstance::OptimizeShadowIV() {
1452 const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
Dan Gohman46bdfb02009-02-24 18:55:53 +00001453 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
Devang Patela0b39092008-08-26 17:57:54 +00001454 return;
Jim Grosbach56a1f802009-11-17 17:53:56 +00001455
Dan Gohmana10756e2010-01-21 02:09:26 +00001456 for (size_t StrideIdx = 0, e = IU.StrideOrder.size();
1457 StrideIdx != e; ++StrideIdx) {
Dan Gohman0bba49c2009-07-07 17:06:11 +00001458 std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
Dan Gohmana10756e2010-01-21 02:09:26 +00001459 IU.IVUsesByStride.find(IU.StrideOrder[StrideIdx]);
1460 assert(SI != IU.IVUsesByStride.end() && "Stride doesn't exist!");
Devang Patela0b39092008-08-26 17:57:54 +00001461 if (!isa<SCEVConstant>(SI->first))
1462 continue;
1463
Dan Gohman81db61a2009-05-12 02:17:14 +00001464 for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
1465 E = SI->second->Users.end(); UI != E; /* empty */) {
1466 ilist<IVStrideUse>::iterator CandidateUI = UI;
Devang Patel54153272008-08-27 17:50:18 +00001467 ++UI;
Dan Gohman81db61a2009-05-12 02:17:14 +00001468 Instruction *ShadowUse = CandidateUI->getUser();
Devang Patela0b39092008-08-26 17:57:54 +00001469 const Type *DestTy = NULL;
1470
1471 /* If shadow use is a int->float cast then insert a second IV
Devang Patel54153272008-08-27 17:50:18 +00001472 to eliminate this cast.
Devang Patela0b39092008-08-26 17:57:54 +00001473
Jim Grosbach56a1f802009-11-17 17:53:56 +00001474 for (unsigned i = 0; i < n; ++i)
Devang Patela0b39092008-08-26 17:57:54 +00001475 foo((double)i);
1476
Devang Patel54153272008-08-27 17:50:18 +00001477 is transformed into
Devang Patela0b39092008-08-26 17:57:54 +00001478
1479 double d = 0.0;
Jim Grosbach56a1f802009-11-17 17:53:56 +00001480 for (unsigned i = 0; i < n; ++i, ++d)
Devang Patela0b39092008-08-26 17:57:54 +00001481 foo(d);
1482 */
Dan Gohman81db61a2009-05-12 02:17:14 +00001483 if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
Devang Patela0b39092008-08-26 17:57:54 +00001484 DestTy = UCast->getDestTy();
Dan Gohman81db61a2009-05-12 02:17:14 +00001485 else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser()))
Devang Patela0b39092008-08-26 17:57:54 +00001486 DestTy = SCast->getDestTy();
Devang Patel18bb2782008-08-27 20:55:23 +00001487 if (!DestTy) continue;
1488
1489 if (TLI) {
Evan Cheng5792f512009-05-11 22:33:01 +00001490 // If target does not support DestTy natively then do not apply
1491 // this transformation.
Owen Andersone50ed302009-08-10 22:56:29 +00001492 EVT DVT = TLI->getValueType(DestTy);
Devang Patel18bb2782008-08-27 20:55:23 +00001493 if (!TLI->isTypeLegal(DVT)) continue;
1494 }
1495
Devang Patela0b39092008-08-26 17:57:54 +00001496 PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
1497 if (!PH) continue;
1498 if (PH->getNumIncomingValues() != 2) continue;
1499
1500 const Type *SrcTy = PH->getType();
1501 int Mantissa = DestTy->getFPMantissaWidth();
Jim Grosbach56a1f802009-11-17 17:53:56 +00001502 if (Mantissa == -1) continue;
Dan Gohmana10756e2010-01-21 02:09:26 +00001503 if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
Devang Patela0b39092008-08-26 17:57:54 +00001504 continue;
1505
1506 unsigned Entry, Latch;
1507 if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
1508 Entry = 0;
1509 Latch = 1;
1510 } else {
1511 Entry = 1;
1512 Latch = 0;
1513 }
Jim Grosbach56a1f802009-11-17 17:53:56 +00001514
Devang Patela0b39092008-08-26 17:57:54 +00001515 ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
1516 if (!Init) continue;
Owen Anderson6f83c9c2009-07-27 20:59:43 +00001517 Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
Devang Patela0b39092008-08-26 17:57:54 +00001518
Jim Grosbach56a1f802009-11-17 17:53:56 +00001519 BinaryOperator *Incr =
Devang Patela0b39092008-08-26 17:57:54 +00001520 dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
1521 if (!Incr) continue;
1522 if (Incr->getOpcode() != Instruction::Add
1523 && Incr->getOpcode() != Instruction::Sub)
1524 continue;
1525
1526 /* Initialize new IV, double d = 0.0 in above example. */
1527 ConstantInt *C = NULL;
1528 if (Incr->getOperand(0) == PH)
1529 C = dyn_cast<ConstantInt>(Incr->getOperand(1));
1530 else if (Incr->getOperand(1) == PH)
1531 C = dyn_cast<ConstantInt>(Incr->getOperand(0));
1532 else
1533 continue;
1534
1535 if (!C) continue;
1536
Dan Gohmana8225082009-10-26 15:32:57 +00001537 // Ignore negative constants, as the code below doesn't handle them
1538 // correctly. TODO: Remove this restriction.
1539 if (!C->getValue().isStrictlyPositive()) continue;
1540
Devang Patela0b39092008-08-26 17:57:54 +00001541 /* Add new PHINode. */
1542 PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
1543
Devang Patel54153272008-08-27 17:50:18 +00001544 /* create new increment. '++d' in above example. */
Owen Anderson6f83c9c2009-07-27 20:59:43 +00001545 Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
Jim Grosbach56a1f802009-11-17 17:53:56 +00001546 BinaryOperator *NewIncr =
Dan Gohmanae3a0be2009-06-04 22:49:04 +00001547 BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
1548 Instruction::FAdd : Instruction::FSub,
Devang Patela0b39092008-08-26 17:57:54 +00001549 NewPH, CFP, "IV.S.next.", Incr);
1550
1551 NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
1552 NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
1553
1554 /* Remove cast operation */
Devang Patela0b39092008-08-26 17:57:54 +00001555 ShadowUse->replaceAllUsesWith(NewPH);
1556 ShadowUse->eraseFromParent();
Devang Patela0b39092008-08-26 17:57:54 +00001557 break;
1558 }
1559 }
1560}
1561
Dan Gohmana10756e2010-01-21 02:09:26 +00001562/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
1563/// set the IV user and stride information and return true, otherwise return
1564/// false.
1565bool LSRInstance::FindIVUserForCond(ICmpInst *Cond,
1566 IVStrideUse *&CondUse,
1567 const SCEV* &CondStride) {
1568 for (unsigned StrideIdx = 0, e = IU.StrideOrder.size();
1569 StrideIdx != e && !CondUse; ++StrideIdx) {
1570 std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
1571 IU.IVUsesByStride.find(IU.StrideOrder[StrideIdx]);
1572 assert(SI != IU.IVUsesByStride.end() && "Stride doesn't exist!");
Chris Lattner010de252005-08-08 05:28:22 +00001573
Dan Gohmana10756e2010-01-21 02:09:26 +00001574 for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
1575 E = SI->second->Users.end(); UI != E; ++UI)
1576 if (UI->getUser() == Cond) {
1577 // NOTE: we could handle setcc instructions with multiple uses here, but
1578 // InstCombine does it as well for simple uses, it's not clear that it
1579 // occurs enough in real life to handle.
1580 CondUse = UI;
1581 CondStride = SI->first;
1582 return true;
1583 }
1584 }
1585 return false;
Evan Cheng2d850522009-05-09 01:08:24 +00001586}
1587
Dan Gohmana10756e2010-01-21 02:09:26 +00001588/// OptimizeMax - Rewrite the loop's terminating condition if it uses
1589/// a max computation.
1590///
1591/// This is a narrow solution to a specific, but acute, problem. For loops
1592/// like this:
1593///
1594/// i = 0;
1595/// do {
1596/// p[i] = 0.0;
1597/// } while (++i < n);
1598///
1599/// the trip count isn't just 'n', because 'n' might not be positive. And
1600/// unfortunately this can come up even for loops where the user didn't use
1601/// a C do-while loop. For example, seemingly well-behaved top-test loops
1602/// will commonly be lowered like this:
1603//
1604/// if (n > 0) {
1605/// i = 0;
1606/// do {
1607/// p[i] = 0.0;
1608/// } while (++i < n);
1609/// }
1610///
1611/// and then it's possible for subsequent optimization to obscure the if
1612/// test in such a way that indvars can't find it.
1613///
1614/// When indvars can't find the if test in loops like this, it creates a
1615/// max expression, which allows it to give the loop a canonical
1616/// induction variable:
1617///
1618/// i = 0;
1619/// max = n < 1 ? 1 : n;
1620/// do {
1621/// p[i] = 0.0;
1622/// } while (++i != max);
1623///
1624/// Canonical induction variables are necessary because the loop passes
1625/// are designed around them. The most obvious example of this is the
1626/// LoopInfo analysis, which doesn't remember trip count values. It
1627/// expects to be able to rediscover the trip count each time it is
1628/// needed, and it does this using a simple analysis that only succeeds if
1629/// the loop has a canonical induction variable.
1630///
1631/// However, when it comes time to generate code, the maximum operation
1632/// can be quite costly, especially if it's inside of an outer loop.
1633///
1634/// This function solves this problem by detecting this type of loop and
1635/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
1636/// the instructions for the maximum computation.
1637///
1638ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
1639 // Check that the loop matches the pattern we're looking for.
1640 if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
1641 Cond->getPredicate() != CmpInst::ICMP_NE)
1642 return Cond;
1643
1644 SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
1645 if (!Sel || !Sel->hasOneUse()) return Cond;
1646
1647 const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
1648 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
1649 return Cond;
1650 const SCEV *One = SE.getIntegerSCEV(1, BackedgeTakenCount->getType());
1651
1652 // Add one to the backedge-taken count to get the trip count.
1653 const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One);
1654
1655 // Check for a max calculation that matches the pattern.
1656 if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount))
1657 return Cond;
1658 const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount);
1659 if (Max != SE.getSCEV(Sel)) return Cond;
1660
1661 // To handle a max with more than two operands, this optimization would
1662 // require additional checking and setup.
1663 if (Max->getNumOperands() != 2)
1664 return Cond;
1665
1666 const SCEV *MaxLHS = Max->getOperand(0);
1667 const SCEV *MaxRHS = Max->getOperand(1);
1668 if (!MaxLHS || MaxLHS != One) return Cond;
1669 // Check the relevant induction variable for conformance to
1670 // the pattern.
1671 const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
1672 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
1673 if (!AR || !AR->isAffine() ||
1674 AR->getStart() != One ||
1675 AR->getStepRecurrence(SE) != One)
1676 return Cond;
1677
1678 assert(AR->getLoop() == L &&
1679 "Loop condition operand is an addrec in a different loop!");
1680
1681 // Check the right operand of the select, and remember it, as it will
1682 // be used in the new comparison instruction.
1683 Value *NewRHS = 0;
1684 if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
1685 NewRHS = Sel->getOperand(1);
1686 else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
1687 NewRHS = Sel->getOperand(2);
1688 if (!NewRHS) return Cond;
1689
1690 // Determine the new comparison opcode. It may be signed or unsigned,
1691 // and the original comparison may be either equality or inequality.
1692 CmpInst::Predicate Pred =
1693 isa<SCEVSMaxExpr>(Max) ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
1694 if (Cond->getPredicate() == CmpInst::ICMP_EQ)
1695 Pred = CmpInst::getInversePredicate(Pred);
1696
1697 // Ok, everything looks ok to change the condition into an SLT or SGE and
1698 // delete the max calculation.
1699 ICmpInst *NewCond =
1700 new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
1701
1702 // Delete the max calculation instructions.
1703 Cond->replaceAllUsesWith(NewCond);
1704 CondUse->setUser(NewCond);
1705 Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
1706 Cond->eraseFromParent();
1707 Sel->eraseFromParent();
1708 if (Cmp->use_empty())
1709 Cmp->eraseFromParent();
1710 return NewCond;
1711}
1712
1713bool LSRInstance::StrideMightBeShared(const SCEV* Stride) {
Evan Cheng586f69a2009-11-12 07:35:05 +00001714 int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue();
Dan Gohmana10756e2010-01-21 02:09:26 +00001715 for (unsigned i = 0, e = IU.StrideOrder.size(); i != e; ++i) {
Evan Cheng586f69a2009-11-12 07:35:05 +00001716 std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
Dan Gohmana10756e2010-01-21 02:09:26 +00001717 IU.IVUsesByStride.find(IU.StrideOrder[i]);
Evan Cheng586f69a2009-11-12 07:35:05 +00001718 const SCEV *Share = SI->first;
1719 if (!isa<SCEVConstant>(SI->first) || Share == Stride)
1720 continue;
1721 int64_t SSInt = cast<SCEVConstant>(Share)->getValue()->getSExtValue();
1722 if (SSInt == SInt)
1723 return true; // This can definitely be reused.
1724 if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0)
1725 continue;
1726 int64_t Scale = SSInt / SInt;
Dan Gohmana10756e2010-01-21 02:09:26 +00001727
1728 // This AM will be used for conservative queries. At this point in the
1729 // process we don't know which users will have a base reg, immediate,
1730 // etc., so we conservatively assume that it may not, making more
1731 // strides valid, thus erring on the side of assuming that there
1732 // might be sharing.
1733 TargetLowering::AddrMode AM;
1734 AM.Scale = Scale;
1735
1736 // Any pre-inc iv use?
1737 IVUsersOfOneStride &StrideUses = *IU.IVUsesByStride[Share];
1738 for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
1739 E = StrideUses.Users.end(); I != E; ++I) {
1740 bool isAddress = isAddressUse(I->getUser(), I->getOperandValToReplace());
1741 if (!I->isUseOfPostIncrementedValue() &&
1742 isLegalUse(AM, isAddress ? LSRUse::Address : LSRUse::Basic,
1743 isAddress ? getAccessType(I->getUser()) : 0,
1744 TLI))
Evan Cheng586f69a2009-11-12 07:35:05 +00001745 return true;
Evan Cheng5792f512009-05-11 22:33:01 +00001746 }
Evan Cheng5792f512009-05-11 22:33:01 +00001747 }
Evan Cheng586f69a2009-11-12 07:35:05 +00001748 return false;
Chris Lattner010de252005-08-08 05:28:22 +00001749}
Nate Begeman16997482005-07-30 00:15:07 +00001750
Jim Grosbach56a1f802009-11-17 17:53:56 +00001751/// OptimizeLoopTermCond - Change loop terminating condition to use the
Evan Cheng586f69a2009-11-12 07:35:05 +00001752/// postinc iv when possible.
Dan Gohmana10756e2010-01-21 02:09:26 +00001753bool
1754LSRInstance::OptimizeLoopTermCond(Instruction *&IVIncInsertPos) {
1755 SmallPtrSet<Instruction *, 4> PostIncs;
1756
Evan Cheng586f69a2009-11-12 07:35:05 +00001757 BasicBlock *LatchBlock = L->getLoopLatch();
Evan Cheng076e0852009-11-17 18:10:11 +00001758 SmallVector<BasicBlock*, 8> ExitingBlocks;
1759 L->getExitingBlocks(ExitingBlocks);
Jim Grosbach56a1f802009-11-17 17:53:56 +00001760
Evan Cheng076e0852009-11-17 18:10:11 +00001761 for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
1762 BasicBlock *ExitingBlock = ExitingBlocks[i];
Evan Cheng586f69a2009-11-12 07:35:05 +00001763
Dan Gohmana10756e2010-01-21 02:09:26 +00001764 // Get the terminating condition for the loop if possible. If we
Evan Cheng076e0852009-11-17 18:10:11 +00001765 // can, we want to change it to use a post-incremented version of its
1766 // induction variable, to allow coalescing the live ranges for the IV into
1767 // one register value.
Evan Cheng586f69a2009-11-12 07:35:05 +00001768
Evan Cheng076e0852009-11-17 18:10:11 +00001769 BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
1770 if (!TermBr)
1771 continue;
1772 // FIXME: Overly conservative, termination condition could be an 'or' etc..
1773 if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
1774 continue;
Evan Cheng586f69a2009-11-12 07:35:05 +00001775
Evan Cheng076e0852009-11-17 18:10:11 +00001776 // Search IVUsesByStride to find Cond's IVUse if there is one.
1777 IVStrideUse *CondUse = 0;
1778 const SCEV *CondStride = 0;
1779 ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
1780 if (!FindIVUserForCond(Cond, CondUse, CondStride))
1781 continue;
1782
Evan Cheng076e0852009-11-17 18:10:11 +00001783 // If the trip count is computed in terms of a max (due to ScalarEvolution
1784 // being unable to find a sufficient guard, for example), change the loop
1785 // comparison to use SLT or ULT instead of NE.
Dan Gohmana10756e2010-01-21 02:09:26 +00001786 // One consequence of doing this now is that it disrupts the count-down
1787 // optimization. That's not always a bad thing though, because in such
1788 // cases it may still be worthwhile to avoid a max.
1789 Cond = OptimizeMax(Cond, CondUse);
Evan Cheng076e0852009-11-17 18:10:11 +00001790
Dan Gohmana10756e2010-01-21 02:09:26 +00001791 // If this exiting block is the latch block, and the condition has only
1792 // one use inside the loop (the branch), use the post-incremented value
1793 // of the induction variable
1794 if (ExitingBlock != LatchBlock) {
1795 // If this exiting block dominates the latch block, it may also use
1796 // the post-inc value if it won't be shared with other uses.
1797 // Check for dominance.
1798 if (!DT.dominates(ExitingBlock, LatchBlock))
1799 continue;
1800 // Check for sharing within the same stride.
1801 bool SameStrideSharing = false;
1802 IVUsersOfOneStride &StrideUses = *IU.IVUsesByStride[CondStride];
1803 for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
1804 E = StrideUses.Users.end(); I != E; ++I) {
1805 if (I->getUser() == Cond)
1806 continue;
1807 if (!I->isUseOfPostIncrementedValue()) {
1808 SameStrideSharing = true;
1809 break;
1810 }
1811 }
1812 if (SameStrideSharing)
1813 continue;
1814 // Check for sharing from a different stride.
1815 if (isa<SCEVConstant>(CondStride) && StrideMightBeShared(CondStride))
1816 continue;
1817 }
1818 if (!Cond->hasOneUse()) {
1819 bool HasOneUseInLoop = true;
1820 for (Value::use_iterator UI = Cond->use_begin(), UE = Cond->use_end();
1821 UI != UE; ++UI) {
1822 Instruction *U = cast<Instruction>(*UI);
1823 if (U == TermBr)
1824 continue;
1825 if (L->contains(U)) {
1826 HasOneUseInLoop = false;
1827 break;
1828 }
1829 }
1830 if (!HasOneUseInLoop)
1831 continue;
Evan Cheng076e0852009-11-17 18:10:11 +00001832 }
1833
David Greene63c94632009-12-23 22:58:38 +00001834 DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "
Dan Gohmana10756e2010-01-21 02:09:26 +00001835 << *Cond << '\n');
Evan Cheng076e0852009-11-17 18:10:11 +00001836
1837 // It's possible for the setcc instruction to be anywhere in the loop, and
1838 // possible for it to have multiple users. If it is not immediately before
1839 // the exiting block branch, move it.
Dan Gohmana10756e2010-01-21 02:09:26 +00001840 if (&*++BasicBlock::iterator(Cond) != TermBr) {
Evan Cheng076e0852009-11-17 18:10:11 +00001841 if (Cond->hasOneUse()) { // Condition has a single use, just move it.
1842 Cond->moveBefore(TermBr);
1843 } else {
1844 // Otherwise, clone the terminating condition and insert into the
1845 // loopend.
Dan Gohmana10756e2010-01-21 02:09:26 +00001846 ICmpInst *OldCond = Cond;
Evan Cheng076e0852009-11-17 18:10:11 +00001847 Cond = cast<ICmpInst>(Cond->clone());
1848 Cond->setName(L->getHeader()->getName() + ".termcond");
1849 ExitingBlock->getInstList().insert(TermBr, Cond);
1850
1851 // Clone the IVUse, as the old use still exists!
Dan Gohmana10756e2010-01-21 02:09:26 +00001852 IU.IVUsesByStride[CondStride]->addUser(CondUse->getOffset(), Cond,
Evan Cheng076e0852009-11-17 18:10:11 +00001853 CondUse->getOperandValToReplace());
Dan Gohmana10756e2010-01-21 02:09:26 +00001854 CondUse = &IU.IVUsesByStride[CondStride]->Users.back();
1855 TermBr->replaceUsesOfWith(OldCond, Cond);
Evan Cheng076e0852009-11-17 18:10:11 +00001856 }
Evan Cheng586f69a2009-11-12 07:35:05 +00001857 }
1858
Evan Cheng076e0852009-11-17 18:10:11 +00001859 // If we get to here, we know that we can transform the setcc instruction to
1860 // use the post-incremented version of the IV, allowing us to coalesce the
1861 // live ranges for the IV correctly.
Dan Gohmana10756e2010-01-21 02:09:26 +00001862 CondUse->setOffset(SE.getMinusSCEV(CondUse->getOffset(), CondStride));
Evan Cheng076e0852009-11-17 18:10:11 +00001863 CondUse->setIsUseOfPostIncrementedValue(true);
1864 Changed = true;
1865
Dan Gohmana10756e2010-01-21 02:09:26 +00001866 PostIncs.insert(Cond);
1867 }
1868
1869 // Determine an insertion point for the loop induction variable increment. It
1870 // must dominate all the post-inc comparisons we just set up, and it must
1871 // dominate the loop latch edge.
1872 IVIncInsertPos = L->getLoopLatch()->getTerminator();
1873 for (SmallPtrSet<Instruction *, 4>::iterator I = PostIncs.begin(),
1874 E = PostIncs.end(); I != E; ++I) {
1875 BasicBlock *BB =
1876 DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
1877 (*I)->getParent());
1878 if (BB == (*I)->getParent())
1879 IVIncInsertPos = *I;
1880 else if (BB != IVIncInsertPos->getParent())
1881 IVIncInsertPos = BB->getTerminator();
1882 }
1883
1884 return Changed;
1885}
1886
1887/// CountRegisters - Note the given register.
1888void LSRInstance::CountRegister(const SCEV *Reg, uint32_t Complexity,
1889 size_t LUIdx) {
1890 std::pair<RegUsesTy::iterator, bool> Pair =
1891 RegUses.insert(std::make_pair(Reg, RegSortData()));
1892 RegSortData &BV = Pair.first->second;
1893 if (Pair.second) {
1894 BV.Index = CurrentArbitraryRegIndex++;
1895 BV.MaxComplexity = Complexity;
1896 RegSequence.push_back(Reg);
1897 } else {
1898 BV.MaxComplexity = std::max(BV.MaxComplexity, Complexity);
1899 }
1900 BV.Bits.resize(std::max(BV.Bits.size(), LUIdx + 1));
1901 BV.Bits.set(LUIdx);
1902}
1903
1904/// CountRegisters - Note which registers are used by the given formula,
1905/// updating RegUses.
1906void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
1907 uint32_t Complexity = F.getComplexity();
1908 if (F.ScaledReg)
1909 CountRegister(F.ScaledReg, Complexity, LUIdx);
1910 for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
1911 E = F.BaseRegs.end(); I != E; ++I)
1912 CountRegister(*I, Complexity, LUIdx);
1913}
1914
1915/// GenerateSymbolicOffsetReuse - Generate reuse formulae using symbolic
1916/// offsets.
1917void LSRInstance::GenerateSymbolicOffsetReuse(LSRUse &LU, unsigned LUIdx,
1918 Formula Base) {
1919 // We can't add a symbolic offset if the address already contains one.
1920 if (Base.AM.BaseGV) return;
1921
1922 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
1923 const SCEV *G = Base.BaseRegs[i];
1924 GlobalValue *GV = ExtractSymbol(G, SE);
1925 if (G->isZero())
1926 continue;
1927 Formula F = Base;
1928 F.AM.BaseGV = GV;
1929 if (!isLegalUse(F.AM, LU.Kind, LU.AccessTy, TLI))
1930 continue;
1931 F.BaseRegs[i] = G;
1932 if (LU.InsertFormula(F))
1933 CountRegisters(LU.Formulae.back(), LUIdx);
Evan Cheng586f69a2009-11-12 07:35:05 +00001934 }
Evan Cheng586f69a2009-11-12 07:35:05 +00001935}
1936
Dan Gohmana10756e2010-01-21 02:09:26 +00001937/// GenerateICmpZeroScaledReuse - For ICmpZero, check to see if we can scale up
1938/// the comparison. For example, x == y -> x*c == y*c.
1939void LSRInstance::GenerateICmpZeroScaledReuse(LSRUse &LU, unsigned LUIdx,
1940 Formula Base) {
1941 if (LU.Kind != LSRUse::ICmpZero) return;
Evan Cheng586f69a2009-11-12 07:35:05 +00001942
Dan Gohmana10756e2010-01-21 02:09:26 +00001943 // Determine the integer type for the base formula.
1944 const Type *IntTy = Base.getType();
1945 if (!IntTy) return;
1946 if (SE.getTypeSizeInBits(IntTy) > 64) return;
1947 IntTy = SE.getEffectiveSCEVType(IntTy);
Evan Cheng586f69a2009-11-12 07:35:05 +00001948
Dan Gohmana10756e2010-01-21 02:09:26 +00001949 assert(!Base.AM.BaseGV && "ICmpZero use is not legal!");
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00001950
Dan Gohmana10756e2010-01-21 02:09:26 +00001951 // Check each interesting stride.
1952 for (SmallSetVector<int64_t, 4>::const_iterator
1953 I = Factors.begin(), E = Factors.end(); I != E; ++I) {
1954 int64_t Factor = *I;
1955 Formula F = Base;
1956
1957 // Check that the multiplication doesn't overflow.
1958 F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs * Factor;
1959 if ((int64_t)F.AM.BaseOffs / Factor != F.AM.BaseOffs)
1960 continue;
1961
1962 // Check that this scale is legal.
1963 if (!isLegalUse(F.AM, LU.Kind, LU.AccessTy, TLI))
1964 continue;
1965
1966 const SCEV *FactorS = SE.getSCEV(ConstantInt::get(IntTy, Factor));
1967
1968 // Check that multiplying with each base register doesn't overflow.
1969 for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
1970 F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
1971 if (getSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
1972 goto next;
1973 }
1974
1975 // Check that multiplying with the scaled register doesn't overflow.
1976 if (F.ScaledReg) {
1977 F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
1978 if (getSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
1979 continue;
1980 }
1981
1982 // If we make it here and it's legal, add it.
1983 if (LU.InsertFormula(F))
1984 CountRegisters(LU.Formulae.back(), LUIdx);
1985 next:;
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00001986 }
Dan Gohmana10756e2010-01-21 02:09:26 +00001987}
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00001988
Dan Gohmana10756e2010-01-21 02:09:26 +00001989/// GenerateFormulaeFromReplacedBaseReg - If removing base register with
1990/// index i from the BaseRegs list and adding the registers in AddOps
1991/// to the address forms an interesting formula, pursue it.
1992void
1993LSRInstance::GenerateFormulaeFromReplacedBaseReg(
1994 LSRUse &LU,
1995 unsigned LUIdx,
1996 const Formula &Base, unsigned i,
1997 const SmallVectorImpl<const SCEV *>
1998 &AddOps) {
1999 if (AddOps.empty()) return;
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00002000
Dan Gohmana10756e2010-01-21 02:09:26 +00002001 Formula F = Base;
2002 std::swap(F.BaseRegs[i], F.BaseRegs.back());
2003 F.BaseRegs.pop_back();
2004 for (SmallVectorImpl<const SCEV *>::const_iterator I = AddOps.begin(),
2005 E = AddOps.end(); I != E; ++I)
2006 F.BaseRegs.push_back(*I);
2007 F.AM.HasBaseReg = !F.BaseRegs.empty();
2008 if (LU.InsertFormula(F)) {
2009 CountRegisters(LU.Formulae.back(), LUIdx);
2010 // Recurse.
2011 GenerateReassociationReuse(LU, LUIdx, LU.Formulae.back());
2012 }
2013}
Evan Cheng81ebdcf2009-11-10 21:14:05 +00002014
Dan Gohmana10756e2010-01-21 02:09:26 +00002015/// GenerateReassociationReuse - Split out subexpressions from adds and
2016/// the bases of addrecs.
2017void LSRInstance::GenerateReassociationReuse(LSRUse &LU, unsigned LUIdx,
2018 Formula Base) {
2019 SmallVector<const SCEV *, 8> AddOps;
2020 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
2021 const SCEV *BaseReg = Base.BaseRegs[i];
2022 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BaseReg)) {
2023 for (SCEVAddExpr::op_iterator J = Add->op_begin(), JE = Add->op_end();
2024 J != JE; ++J) {
2025 // Don't pull a constant into a register if the constant could be
2026 // folded into an immediate field.
2027 if (isAlwaysFoldable(*J, true, LU.Kind, LU.AccessTy, TLI, SE)) continue;
2028 SmallVector<const SCEV *, 8> InnerAddOps;
2029 for (SCEVAddExpr::op_iterator K = Add->op_begin(); K != JE; ++K)
2030 if (K != J)
2031 InnerAddOps.push_back(*K);
2032 // Splitting a 2-operand add both ways is redundant. Pruning this
2033 // now saves compile time.
2034 if (InnerAddOps.size() < 2 && next(J) == JE)
2035 continue;
2036 AddOps.push_back(*J);
2037 const SCEV *InnerAdd = SE.getAddExpr(InnerAddOps);
2038 AddOps.push_back(InnerAdd);
2039 GenerateFormulaeFromReplacedBaseReg(LU, LUIdx, Base, i, AddOps);
2040 AddOps.clear();
2041 }
2042 } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BaseReg)) {
2043 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(AR->getStart())) {
2044 for (SCEVAddExpr::op_iterator J = Add->op_begin(), JE = Add->op_end();
2045 J != JE; ++J) {
2046 // Don't pull a constant into a register if the constant could be
2047 // folded into an immediate field.
2048 if (isAlwaysFoldable(*J, true, LU.Kind, LU.AccessTy, TLI, SE))
2049 continue;
2050 SmallVector<const SCEV *, 8> InnerAddOps;
2051 for (SCEVAddExpr::op_iterator K = Add->op_begin(); K != JE; ++K)
2052 if (K != J)
2053 InnerAddOps.push_back(*K);
2054 AddOps.push_back(*J);
2055 const SCEV *InnerAdd = SE.getAddExpr(InnerAddOps);
2056 AddOps.push_back(SE.getAddRecExpr(InnerAdd,
2057 AR->getStepRecurrence(SE),
2058 AR->getLoop()));
2059 GenerateFormulaeFromReplacedBaseReg(LU, LUIdx, Base, i, AddOps);
2060 AddOps.clear();
2061 }
2062 } else if (!isAlwaysFoldable(AR->getStart(), Base.BaseRegs.size() > 1,
2063 LU.Kind, LU.AccessTy,
2064 TLI, SE)) {
2065 AddOps.push_back(AR->getStart());
2066 AddOps.push_back(SE.getAddRecExpr(SE.getIntegerSCEV(0,
2067 BaseReg->getType()),
2068 AR->getStepRecurrence(SE),
2069 AR->getLoop()));
2070 GenerateFormulaeFromReplacedBaseReg(LU, LUIdx, Base, i, AddOps);
2071 AddOps.clear();
2072 }
2073 }
2074 }
2075}
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00002076
Dan Gohmana10756e2010-01-21 02:09:26 +00002077/// GenerateCombinationReuse - Generate a formula consisting of all of the
2078/// loop-dominating registers added into a single register.
2079void LSRInstance::GenerateCombinationReuse(LSRUse &LU, unsigned LUIdx,
2080 Formula Base) {
2081 if (Base.BaseRegs.size() <= 1) return;
2082
2083 Formula F = Base;
2084 F.BaseRegs.clear();
2085 SmallVector<const SCEV *, 4> Ops;
2086 for (SmallVectorImpl<const SCEV *>::const_iterator
2087 I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) {
2088 const SCEV *BaseReg = *I;
2089 if (BaseReg->properlyDominates(L->getHeader(), &DT) &&
2090 !BaseReg->hasComputableLoopEvolution(L))
2091 Ops.push_back(BaseReg);
2092 else
2093 F.BaseRegs.push_back(BaseReg);
2094 }
2095 if (Ops.size() > 1) {
2096 F.BaseRegs.push_back(SE.getAddExpr(Ops));
2097 if (LU.InsertFormula(F))
2098 CountRegisters(LU.Formulae.back(), LUIdx);
2099 }
2100}
2101
2102/// GenerateScaledReuse - Generate stride factor reuse formulae by making
2103/// use of scaled-offset address modes, for example.
2104void LSRInstance::GenerateScaledReuse(LSRUse &LU, unsigned LUIdx,
2105 Formula Base) {
2106 // Determine the integer type for the base formula.
2107 const Type *IntTy = Base.getType();
2108 if (!IntTy) return;
2109 IntTy = SE.getEffectiveSCEVType(IntTy);
2110
2111 // Check each interesting stride.
2112 for (SmallSetVector<int64_t, 4>::const_iterator
2113 I = Factors.begin(), E = Factors.end(); I != E; ++I) {
2114 int64_t Factor = *I;
2115
2116 // If this Formula already has a scaled register, we can't add another one.
2117 if (Base.AM.Scale != 0)
2118 continue;
2119 Formula F = Base;
2120 F.AM.Scale = Factor;
2121 // Check whether this scale is going to be legal.
2122 if (!isLegalUse(F.AM, LU.Kind, LU.AccessTy, TLI)) {
2123 // As a special-case, handle special out-of-loop Basic users specially.
2124 // TODO: Reconsider this special case.
2125 if (LU.Kind == LSRUse::Basic &&
2126 isLegalUse(F.AM, LSRUse::Special, LU.AccessTy, TLI) &&
2127 !L->contains(LU.UserInst))
2128 LU.Kind = LSRUse::Special;
2129 else
2130 continue;
2131 }
2132 // For each addrec base reg, apply the scale, if possible.
2133 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
2134 if (const SCEVAddRecExpr *AR =
2135 dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
2136 const SCEV *FactorS = SE.getSCEV(ConstantInt::get(IntTy, Factor));
2137 // Divide out the factor, ignoring high bits, since we'll be
2138 // scaling the value back up in the end.
2139 if (const SCEV *Quotient = getSDiv(AR, FactorS, SE, true)) {
2140 // TODO: This could be optimized to avoid all the copying.
2141 Formula NewF = F;
2142 NewF.ScaledReg = Quotient;
2143 std::swap(NewF.BaseRegs[i], NewF.BaseRegs.back());
2144 NewF.BaseRegs.pop_back();
2145 NewF.AM.HasBaseReg = !NewF.BaseRegs.empty();
2146 if (LU.InsertFormula(NewF))
2147 CountRegisters(LU.Formulae.back(), LUIdx);
2148 }
Evan Cheng586f69a2009-11-12 07:35:05 +00002149 }
2150 }
Evan Cheng586f69a2009-11-12 07:35:05 +00002151}
2152
Dan Gohmana10756e2010-01-21 02:09:26 +00002153/// GenerateTruncateReuse - Generate reuse formulae from different IV types.
2154void LSRInstance::GenerateTruncateReuse(LSRUse &LU, unsigned LUIdx,
2155 Formula Base) {
2156 // This requires TargetLowering to tell us which truncates are free.
2157 if (!TLI) return;
Evan Cheng586f69a2009-11-12 07:35:05 +00002158
Dan Gohmana10756e2010-01-21 02:09:26 +00002159 // Don't attempt to truncate symbolic values.
2160 if (Base.AM.BaseGV) return;
2161
2162 // Determine the integer type for the base formula.
2163 const Type *DstTy = Base.getType();
2164 if (!DstTy) return;
2165 DstTy = SE.getEffectiveSCEVType(DstTy);
2166
2167 for (SmallSetVector<const Type *, 4>::const_iterator
2168 I = Types.begin(), E = Types.end(); I != E; ++I) {
2169 const Type *SrcTy = *I;
2170 if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
2171 Formula F = Base;
2172 if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
2173 for (SmallVectorImpl<const SCEV *>::iterator J = F.BaseRegs.begin(),
2174 JE = F.BaseRegs.end(); J != JE; ++J)
2175 *J = SE.getAnyExtendExpr(*J, SrcTy);
2176 if (LU.InsertFormula(F))
2177 CountRegisters(LU.Formulae.back(), LUIdx);
2178 }
2179 }
2180}
2181
2182namespace {
2183
2184/// WorkItem - Helper class for GenerateConstantOffsetReuse. It's used to
2185/// defer modifications so that the search phase doesn't have to worry about
2186/// the data structures moving underneath it.
2187struct WorkItem {
2188 LSRUse *LU;
2189 size_t LUIdx;
2190 int64_t Imm;
2191 const SCEV *OrigReg;
2192
2193 WorkItem(LSRUse *U, size_t LI, int64_t I, const SCEV *R)
2194 : LU(U), LUIdx(LI), Imm(I), OrigReg(R) {}
2195
2196 void print(raw_ostream &OS) const;
2197 void dump() const;
2198};
2199
2200void WorkItem::print(raw_ostream &OS) const {
2201 OS << "in use ";
2202 LU->print(OS);
2203 OS << " (at index " << LUIdx << "), add offset " << Imm
2204 << " and compensate by adjusting refences to " << *OrigReg << "\n";
2205}
2206
2207void WorkItem::dump() const {
2208 print(errs()); errs() << '\n';
2209}
2210
2211}
2212
2213/// GenerateConstantOffsetReuse - Look for registers which are a constant
2214/// distance apart and try to form reuse opportunities between them.
2215void LSRInstance::GenerateConstantOffsetReuse() {
2216 // Group the registers by their value without any added constant offset.
2217 typedef std::map<int64_t, const SCEV *> ImmMapTy;
2218 typedef DenseMap<const SCEV *, ImmMapTy> RegMapTy;
2219 RegMapTy Map;
2220 SmallVector<const SCEV *, 8> Sequence;
2221 for (SmallVectorImpl<const SCEV *>::iterator I = RegSequence.begin(),
2222 E = RegSequence.end(); I != E; ++I) {
2223 const SCEV *Reg = *I;
2224 int64_t Imm = ExtractImmediate(Reg, SE);
2225 std::pair<RegMapTy::iterator, bool> Pair =
2226 Map.insert(std::make_pair(Reg, ImmMapTy()));
2227 if (Pair.second)
2228 Sequence.push_back(Reg);
2229 Pair.first->second.insert(std::make_pair(Imm, *I));
2230 }
2231
2232 // Insert an artificial expression at offset 0 (if there isn't one already),
2233 // as this may lead to more reuse opportunities.
2234 for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(),
2235 E = Sequence.end(); I != E; ++I)
2236 Map.find(*I)->second.insert(ImmMapTy::value_type(0, 0));
2237
2238 // Now examine each set of registers with the same base value. Build up
2239 // a list of work to do and do the work in a separate step so that we're
2240 // not adding formulae and register counts while we're searching.
2241 SmallVector<WorkItem, 32> WorkItems;
2242 for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(),
2243 E = Sequence.end(); I != E; ++I) {
2244 const SCEV *Reg = *I;
2245 const ImmMapTy &Imms = Map.find(Reg)->second;
2246 // Examine each offset.
2247 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
2248 J != JE; ++J) {
2249 const SCEV *OrigReg = J->second;
2250 // Skip the artifical register at offset 0.
2251 if (!OrigReg) continue;
2252
2253 int64_t JImm = J->first;
2254 const SmallBitVector &Bits = RegUses.find(OrigReg)->second.Bits;
2255
2256 // Examine each other offset associated with the same register. This is
2257 // quadradic in the number of registers with the same base, but it's
2258 // uncommon for this to be a large number.
2259 for (ImmMapTy::const_iterator M = Imms.begin(); M != JE; ++M) {
2260 if (M == J) continue;
2261
2262 // Compute the difference between the two.
2263 int64_t Imm = (uint64_t)JImm - M->first;
2264 for (int LUIdx = Bits.find_first(); LUIdx != -1;
2265 LUIdx = Bits.find_next(LUIdx))
2266 // Make a memo of this use, offset, and register tuple.
2267 WorkItems.push_back(WorkItem(&Uses[LUIdx], LUIdx, Imm, OrigReg));
Evan Cheng586f69a2009-11-12 07:35:05 +00002268 }
2269 }
2270 }
2271
Dan Gohmana10756e2010-01-21 02:09:26 +00002272 // Now iterate through the worklist and add new formulae.
2273 for (SmallVectorImpl<WorkItem>::const_iterator I = WorkItems.begin(),
2274 E = WorkItems.end(); I != E; ++I) {
2275 const WorkItem &WI = *I;
2276 LSRUse &LU = *WI.LU;
2277 size_t LUIdx = WI.LUIdx;
2278 int64_t Imm = WI.Imm;
2279 const SCEV *OrigReg = WI.OrigReg;
2280
2281 const Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
2282 const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy,
2283 -(uint64_t)Imm));
2284
2285 for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
2286 Formula F = LU.Formulae[L];
2287 // Use the immediate in the scaled register.
2288 if (F.ScaledReg == OrigReg) {
2289 int64_t Offs = (uint64_t)F.AM.BaseOffs +
2290 Imm * (uint64_t)F.AM.Scale;
2291 // Don't create 50 + reg(-50).
2292 if (F.referencesReg(SE.getSCEV(
2293 ConstantInt::get(IntTy, -(uint64_t)Offs))))
2294 continue;
2295 Formula NewF = F;
2296 NewF.AM.BaseOffs = Offs;
2297 if (!isLegalUse(NewF.AM, LU.Kind, LU.AccessTy, TLI))
2298 continue;
2299 const SCEV *Diff = SE.getAddExpr(NegImmS, NewF.ScaledReg);
2300 if (Diff->isZero()) continue;
2301 NewF.ScaledReg = Diff;
2302 if (LU.InsertFormula(NewF))
2303 CountRegisters(LU.Formulae.back(), LUIdx);
2304 }
2305 // Use the immediate in a base register.
2306 for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
2307 const SCEV *BaseReg = F.BaseRegs[N];
2308 if (BaseReg != OrigReg)
2309 continue;
2310 Formula NewF = F;
2311 NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
2312 if (!isLegalUse(NewF.AM, LU.Kind, LU.AccessTy, TLI))
2313 continue;
2314 const SCEV *Diff = SE.getAddExpr(NegImmS, BaseReg);
2315 if (Diff->isZero()) continue;
2316 // Don't create 50 + reg(-50).
2317 if (Diff ==
2318 SE.getSCEV(ConstantInt::get(IntTy,
2319 -(uint64_t)NewF.AM.BaseOffs)))
2320 continue;
2321 NewF.BaseRegs[N] = Diff;
2322 if (LU.InsertFormula(NewF))
2323 CountRegisters(LU.Formulae.back(), LUIdx);
2324 }
2325 }
2326 }
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00002327}
2328
Dan Gohmana10756e2010-01-21 02:09:26 +00002329/// GenerateAllReuseFormulae - Generate formulae for each use.
2330void
2331LSRInstance::GenerateAllReuseFormulae() {
2332 SmallVector<Formula, 12> Save;
2333 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
2334 LSRUse &LU = Uses[LUIdx];
2335
2336 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
2337 GenerateSymbolicOffsetReuse(LU, LUIdx, LU.Formulae[i]);
2338 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
2339 GenerateICmpZeroScaledReuse(LU, LUIdx, LU.Formulae[i]);
2340 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
2341 GenerateReassociationReuse(LU, LUIdx, LU.Formulae[i]);
2342 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
2343 GenerateCombinationReuse(LU, LUIdx, LU.Formulae[i]);
2344 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
2345 GenerateScaledReuse(LU, LUIdx, LU.Formulae[i]);
2346 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
2347 GenerateTruncateReuse(LU, LUIdx, LU.Formulae[i]);
2348 }
2349
2350 GenerateConstantOffsetReuse();
2351}
2352
2353/// GenerateLoopInvariantRegisterUses - Check for other uses of loop-invariant
2354/// values which we're tracking. These other uses will pin these values in
2355/// registers, making them less profitable for elimination.
2356/// TODO: This currently misses non-constant addrec step registers.
2357/// TODO: Should this give more weight to users inside the loop?
2358void
2359LSRInstance::GenerateLoopInvariantRegisterUses() {
2360 for (size_t i = 0, e = RegSequence.size(); i != e; ++i)
2361 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(RegSequence[i])) {
2362 const Value *V = U->getValue();
2363 if (const Instruction *Inst = dyn_cast<Instruction>(V))
2364 if (L->contains(Inst)) continue;
2365 for (Value::use_const_iterator UI = V->use_begin(), UE = V->use_end();
2366 UI != UE; ++UI) {
2367 const Instruction *UserInst = dyn_cast<Instruction>(*UI);
2368 // Ignore non-instructions.
2369 if (!UserInst)
2370 continue;
2371 // Ignore instructions in other functions (as can happen with
2372 // Constants).
2373 if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
2374 continue;
2375 // Ignore instructions not dominated by the loop.
2376 const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
2377 UserInst->getParent() :
2378 cast<PHINode>(UserInst)->getIncomingBlock(
2379 PHINode::getIncomingValueNumForOperand(UI.getOperandNo()));
2380 if (!DT.dominates(L->getHeader(), UseBB))
2381 continue;
2382 // Ignore uses which are part of other SCEV expressions, to avoid
2383 // analyzing them multiple times.
2384 if (SE.isSCEVable(UserInst->getType()) &&
2385 !isa<SCEVUnknown>(SE.getSCEV(const_cast<Instruction *>(UserInst))))
2386 continue;
2387 // Ignore icmp instructions which are already being analyzed.
2388 if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
2389 unsigned OtherIdx = !UI.getOperandNo();
2390 Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
2391 if (SE.getSCEV(OtherOp)->hasComputableLoopEvolution(L))
2392 continue;
2393 }
2394
2395 LSRUse &LU = getNewUse();
2396 LU.UserInst = const_cast<Instruction *>(UserInst);
2397 LU.OperandValToReplace = UI.getUse();
2398 LU.InsertSupplementalFormula(U);
2399 CountRegisters(LU.Formulae.back(), Uses.size() - 1);
2400 }
2401 }
2402}
2403
2404#ifndef NDEBUG
2405
2406static void debug_winner(SmallVector<LSRUse, 16> const &Uses) {
2407 dbgs() << "LSR has selected formulae for each use:\n";
2408 for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
2409 E = Uses.end(); I != E; ++I) {
2410 const LSRUse &LU = *I;
2411 dbgs() << " ";
2412 LU.print(dbgs());
2413 dbgs() << '\n';
2414 dbgs() << " ";
2415 LU.Formulae.front().print(dbgs());
2416 dbgs() << "\n";
2417 }
2418}
2419
2420#endif
2421
2422LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
2423 : IU(P->getAnalysis<IVUsers>()),
2424 SE(P->getAnalysis<ScalarEvolution>()),
2425 DT(P->getAnalysis<DominatorTree>()),
2426 TLI(tli), L(l), Changed(false), CurrentArbitraryRegIndex(0) {
Devang Patel0f54dcb2007-03-06 21:14:09 +00002427
Dan Gohman03e896b2009-11-05 21:11:53 +00002428 // If LoopSimplify form is not available, stay out of trouble.
Dan Gohmana10756e2010-01-21 02:09:26 +00002429 if (!L->isLoopSimplifyForm()) return;
Dan Gohman03e896b2009-11-05 21:11:53 +00002430
Dan Gohmana10756e2010-01-21 02:09:26 +00002431 // If there's no interesting work to be done, bail early.
2432 if (IU.IVUsesByStride.empty()) return;
Dan Gohman80b0f8c2009-03-09 20:34:59 +00002433
Dan Gohmana10756e2010-01-21 02:09:26 +00002434 DEBUG(dbgs() << "\nLSR on loop ";
2435 WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
2436 dbgs() << ":\n");
Dan Gohmanf7912df2009-03-09 20:46:50 +00002437
Dan Gohmana10756e2010-01-21 02:09:26 +00002438 // Sort the StrideOrder so we process larger strides first.
2439 std::stable_sort(IU.StrideOrder.begin(), IU.StrideOrder.end(),
2440 StrideCompare(SE));
Chris Lattner010de252005-08-08 05:28:22 +00002441
Dan Gohmana10756e2010-01-21 02:09:26 +00002442 /// OptimizeShadowIV - If IV is used in a int-to-float cast
2443 /// inside the loop then try to eliminate the cast opeation.
2444 OptimizeShadowIV();
Evan Cheng5792f512009-05-11 22:33:01 +00002445
Dan Gohmana10756e2010-01-21 02:09:26 +00002446 // Change loop terminating condition to use the postinc iv when possible.
2447 Instruction *IVIncInsertPos;
2448 Changed |= OptimizeLoopTermCond(IVIncInsertPos);
Chris Lattner010de252005-08-08 05:28:22 +00002449
Dan Gohmana10756e2010-01-21 02:09:26 +00002450 for (SmallVectorImpl<const SCEV *>::const_iterator SIter =
2451 IU.StrideOrder.begin(), SEnd = IU.StrideOrder.end();
2452 SIter != SEnd; ++SIter) {
2453 const SCEV *Stride = *SIter;
Misha Brukmanfd939082005-04-21 23:48:37 +00002454
Dan Gohmana10756e2010-01-21 02:09:26 +00002455 // Collect interesting types.
2456 Types.insert(SE.getEffectiveSCEVType(Stride->getType()));
Evan Chengd1d6b5c2006-03-16 21:53:05 +00002457
Dan Gohmana10756e2010-01-21 02:09:26 +00002458 // Collect interesting factors.
2459 for (SmallVectorImpl<const SCEV *>::const_iterator NewStrideIter =
2460 SIter + 1; NewStrideIter != SEnd; ++NewStrideIter) {
2461 const SCEV *OldStride = Stride;
2462 const SCEV *NewStride = *NewStrideIter;
Nate Begemaneaa13852004-10-18 21:08:22 +00002463
Dan Gohmana10756e2010-01-21 02:09:26 +00002464 if (SE.getTypeSizeInBits(OldStride->getType()) !=
2465 SE.getTypeSizeInBits(NewStride->getType())) {
2466 if (SE.getTypeSizeInBits(OldStride->getType()) >
2467 SE.getTypeSizeInBits(NewStride->getType()))
2468 NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
2469 else
2470 OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
2471 }
2472 if (const SCEVConstant *Factor =
2473 dyn_cast_or_null<SCEVConstant>(getSDiv(NewStride, OldStride,
2474 SE, true)))
2475 if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
2476 Factors.insert(Factor->getValue()->getValue().getSExtValue());
2477 }
Dan Gohman6bec5bb2009-12-18 00:06:20 +00002478
Dan Gohmana10756e2010-01-21 02:09:26 +00002479 std::map<const SCEV *, IVUsersOfOneStride *>::const_iterator SI =
2480 IU.IVUsesByStride.find(Stride);
2481 assert(SI != IU.IVUsesByStride.end() && "Stride doesn't exist!");
2482 for (ilist<IVStrideUse>::const_iterator UI = SI->second->Users.begin(),
2483 E = SI->second->Users.end(); UI != E; ++UI) {
2484 // Record the uses.
2485 LSRUse &LU = getNewUse();
2486 LU.UserInst = UI->getUser();
2487 LU.OperandValToReplace = UI->getOperandValToReplace();
2488 if (isAddressUse(LU.UserInst, LU.OperandValToReplace)) {
2489 LU.Kind = LSRUse::Address;
2490 LU.AccessTy = getAccessType(LU.UserInst);
2491 }
2492 if (UI->isUseOfPostIncrementedValue())
2493 LU.PostIncLoop = L;
Dan Gohman6bec5bb2009-12-18 00:06:20 +00002494
Dan Gohmana10756e2010-01-21 02:09:26 +00002495 const SCEV *S = IU.getCanonicalExpr(*UI);
2496
2497 // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
2498 // (N - i == 0), and this allows (N - i) to be the expression that we
2499 // work with rather than just N or i, so we can consider the register
2500 // requirements for both N and i at the same time. Limiting this code
2501 // to equality icmps is not a problem because all interesting loops
2502 // use equality icmps, thanks to IndVarSimplify.
2503 if (ICmpInst *CI = dyn_cast<ICmpInst>(LU.UserInst))
2504 if (CI->isEquality()) {
2505 // Swap the operands if needed to put the OperandValToReplace on
2506 // the left, for consistency.
2507 Value *NV = CI->getOperand(1);
2508 if (NV == LU.OperandValToReplace) {
2509 CI->setOperand(1, CI->getOperand(0));
2510 CI->setOperand(0, NV);
2511 }
2512
2513 // x == y --> x - y == 0
2514 const SCEV *N = SE.getSCEV(NV);
2515 if (N->isLoopInvariant(L)) {
2516 LU.Kind = LSRUse::ICmpZero;
2517 S = SE.getMinusSCEV(N, S);
2518 }
2519
2520 // -1 and the negations of all interesting strides (except the
2521 // negation of -1) are now also interesting.
2522 for (size_t i = 0, e = Factors.size(); i != e; ++i)
2523 if (Factors[i] != -1)
2524 Factors.insert(-(uint64_t)Factors[i]);
2525 Factors.insert(-1);
2526 }
2527
2528 // Ok, now enumerate all the different formulae we can find to compute
2529 // the value for this expression.
2530 LU.InsertInitialFormula(S, L, SE, DT);
2531 CountRegisters(LU.Formulae.back(), Uses.size() - 1);
2532 }
Evan Cheng81ebdcf2009-11-10 21:14:05 +00002533 }
Dale Johannesenc1acc3f2009-05-11 17:15:42 +00002534
Dan Gohmana10756e2010-01-21 02:09:26 +00002535 // If all uses use the same type, don't bother looking for truncation-based
2536 // reuse.
2537 if (Types.size() == 1)
2538 Types.clear();
2539
2540 // Now use the reuse data to generate a bunch of interesting ways
2541 // to formulate the values needed for the uses.
2542 GenerateAllReuseFormulae();
2543
2544 // If there are any uses of registers that we're tracking that have escaped
2545 // IVUsers' attention, add trivial uses for them, so that the register
2546 // voting process takes the into consideration.
2547 GenerateLoopInvariantRegisterUses();
2548
2549 // Sort the formulae. TODO: This is redundantly sorted below.
2550 for (SmallVectorImpl<LSRUse>::iterator I = Uses.begin(), E = Uses.end();
2551 I != E; ++I) {
2552 LSRUse &LU = *I;
2553 std::stable_sort(LU.Formulae.begin(), LU.Formulae.end(),
2554 ComplexitySorter());
2555 }
2556
2557 // Ok, we've now collected all the uses and noted their register uses. The
2558 // next step is to start looking at register reuse possibilities.
2559 DEBUG(print(dbgs()); dbgs() << '\n');
2560
2561 // Start by assuming we'll assign each use its own register. This is
2562 // sometimes called "full" strength reduction, or "superhero" mode.
2563 // Sometimes this is the best solution, but if there are opportunities for
2564 // reuse we may find a better solution.
2565 Score CurScore;
2566 CurScore.RateInitial(Uses, L, SE);
2567
2568 // Create a sorted list of registers with those with the most uses appearing
2569 // earlier in the list. We'll visit them first, as they're the most likely
2570 // to represent profitable reuse opportunities.
2571 SmallVector<RegCount, 8> RegOrder;
2572 for (SmallVectorImpl<const SCEV *>::const_iterator I =
2573 RegSequence.begin(), E = RegSequence.end(); I != E; ++I)
2574 RegOrder.push_back(RegCount(*I, RegUses.find(*I)->second));
2575 std::stable_sort(RegOrder.begin(), RegOrder.end());
2576
2577 // Visit each register. Determine which ones represent profitable reuse
2578 // opportunities and remember them.
2579 // TODO: Extract this code into a function.
2580 for (SmallVectorImpl<RegCount>::const_iterator I = RegOrder.begin(),
2581 E = RegOrder.end(); I != E; ++I) {
2582 const SCEV *Reg = I->Reg;
2583 const SmallBitVector &Bits = I->Sort.Bits;
2584
2585 // Registers with only one use don't represent reuse opportunities, so
2586 // when we get there, we're done.
2587 if (Bits.count() <= 1) break;
2588
2589 DEBUG(dbgs() << "Reg " << *Reg << ": ";
2590 I->Sort.print(dbgs());
2591 dbgs() << '\n');
2592
2593 // Determine the total number of registers will be needed if we make use
2594 // of the reuse opportunity represented by the current register.
2595 Score NewScore;
2596 NewScore.Rate(Reg, Bits, Uses, L, SE);
2597
2598 // Now decide whether this register's reuse opportunity is an overall win.
2599 // Currently the decision is heavily skewed for register pressure.
2600 if (!(NewScore < CurScore)) {
2601 continue;
2602 }
2603
2604 // Ok, use this opportunity.
2605 DEBUG(dbgs() << "This candidate has been accepted.\n");
2606 CurScore = NewScore;
2607
2608 // Now that we've selected a new reuse opportunity, delete formulae that
2609 // do not participate in that opportunity.
2610 for (int j = Bits.find_first(); j != -1; j = Bits.find_next(j)) {
2611 LSRUse &LU = Uses[j];
2612 for (unsigned k = 0, h = LU.Formulae.size(); k != h; ++k) {
2613 Formula &F = LU.Formulae[k];
2614 if (!F.referencesReg(Reg)) {
2615 std::swap(LU.Formulae[k], LU.Formulae.back());
2616 LU.Formulae.pop_back();
2617 --k; --h;
2618 }
2619 }
2620 // Also re-sort the list to put the formulae with the fewest registers
2621 // at the front.
2622 // TODO: Do this earlier, we don't need it each time.
2623 std::stable_sort(LU.Formulae.begin(), LU.Formulae.end(),
2624 ComplexitySorter());
2625 }
2626 }
2627
2628 // Ok, we've now made all our decisions. The first formula for each use
2629 // will be used.
2630 DEBUG(dbgs() << "Concluding, we need "; CurScore.print(dbgs());
2631 dbgs() << ".\n";
2632 debug_winner(Uses));
2633
2634 // Free memory no longer needed.
2635 RegOrder.clear();
2636 Factors.clear();
2637 Types.clear();
2638 RegUses.clear();
2639 RegSequence.clear();
2640
2641 // Keep track of instructions we may have made dead, so that
2642 // we can remove them after we are done working.
2643 SmallVector<WeakVH, 16> DeadInsts;
2644
2645 SCEVExpander Rewriter(SE);
2646 Rewriter.disableCanonicalMode();
2647 Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
2648
2649 // Expand the new value definitions and update the users.
2650 for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
2651 E = Uses.end(); I != E; ++I) {
2652 // Formulae should be legal.
2653 DEBUG(for (SmallVectorImpl<Formula>::const_iterator J = I->Formulae.begin(),
2654 JE = I->Formulae.end(); J != JE; ++J)
2655 assert(isLegalUse(J->AM, I->Kind, I->AccessTy, TLI) &&
2656 "Illegal formula generated!"));
2657
2658 // Expand the new code and update the user.
2659 I->Rewrite(L, Rewriter, DeadInsts, SE, DT, P);
2660 Changed = true;
2661 }
2662
2663 // Clean up after ourselves. This must be done before deleting any
2664 // instructions.
2665 Rewriter.clear();
2666
2667 Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
2668}
2669
2670void LSRInstance::print(raw_ostream &OS) const {
2671 OS << "LSR has identified the following interesting factors and types: ";
2672 bool First = true;
2673
2674 for (SmallSetVector<int64_t, 4>::const_iterator
2675 I = Factors.begin(), E = Factors.end(); I != E; ++I) {
2676 if (!First) OS << ", ";
2677 First = false;
2678 OS << '*' << *I;
2679 }
2680
2681 for (SmallSetVector<const Type *, 4>::const_iterator
2682 I = Types.begin(), E = Types.end(); I != E; ++I) {
2683 if (!First) OS << ", ";
2684 First = false;
2685 OS << '(' << **I << ')';
2686 }
2687 OS << '\n';
2688
2689 OS << "LSR is examining the following uses, and candidate formulae:\n";
2690 for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
2691 E = Uses.end(); I != E; ++I) {
2692 const LSRUse &LU = *I;
2693 dbgs() << " ";
2694 LU.print(OS);
2695 OS << '\n';
2696 for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
2697 JE = LU.Formulae.end(); J != JE; ++J) {
2698 OS << " ";
2699 J->print(OS);
2700 OS << "\n";
2701 }
2702 }
2703}
2704
2705void LSRInstance::dump() const {
2706 print(errs()); errs() << '\n';
2707}
2708
2709namespace {
2710
2711class LoopStrengthReduce : public LoopPass {
2712 /// TLI - Keep a pointer of a TargetLowering to consult for determining
2713 /// transformation profitability.
2714 const TargetLowering *const TLI;
2715
2716public:
2717 static char ID; // Pass ID, replacement for typeid
2718 explicit LoopStrengthReduce(const TargetLowering *tli = NULL);
2719
2720private:
2721 bool runOnLoop(Loop *L, LPPassManager &LPM);
2722 void getAnalysisUsage(AnalysisUsage &AU) const;
2723};
2724
2725}
2726
2727char LoopStrengthReduce::ID = 0;
2728static RegisterPass<LoopStrengthReduce>
2729X("loop-reduce", "Loop Strength Reduction");
2730
2731Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
2732 return new LoopStrengthReduce(TLI);
2733}
2734
2735LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
2736 : LoopPass(&ID), TLI(tli) {}
2737
2738void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
2739 // We split critical edges, so we change the CFG. However, we do update
2740 // many analyses if they are around.
2741 AU.addPreservedID(LoopSimplifyID);
2742 AU.addPreserved<LoopInfo>();
2743 AU.addPreserved("domfrontier");
2744
2745 AU.addRequiredID(LoopSimplifyID);
2746 AU.addRequired<DominatorTree>();
2747 AU.addPreserved<DominatorTree>();
2748 AU.addRequired<ScalarEvolution>();
2749 AU.addPreserved<ScalarEvolution>();
2750 AU.addRequired<IVUsers>();
2751 AU.addPreserved<IVUsers>();
2752}
2753
2754bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
2755 bool Changed = false;
2756
2757 // Run the main LSR transformation.
2758 Changed |= LSRInstance(TLI, L, this).getChanged();
2759
Dan Gohmanafc36a92009-05-02 18:29:22 +00002760 // At this point, it is worth checking to see if any recurrence PHIs are also
Dan Gohman35738ac2009-05-04 22:30:44 +00002761 // dead, so that we can remove them as well.
Dan Gohman9fff2182010-01-05 16:31:45 +00002762 Changed |= DeleteDeadPHIs(L->getHeader());
Dan Gohmanafc36a92009-05-02 18:29:22 +00002763
Evan Cheng1ce75dc2008-07-07 19:51:32 +00002764 return Changed;
Nate Begemaneaa13852004-10-18 21:08:22 +00002765}