Blame - polly/lib/IndVarSimplify.cpp - toolchain/llvm-project

blob: 5ef95ba1a7a42fd45a213feb2b5271727b07fd9b [file] [log] [blame]

Tobias Grosser	cef36d5	2012-02-14 14:02:33 +0000	[diff] [blame]	1	//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This transformation analyzes and transforms the induction variables (and
				11	// computations derived from them) into simpler forms suitable for subsequent
				12	// analysis and transformation.
				13	//
				14	// If the trip count of a loop is computable, this pass also makes the following
				15	// changes:
				16	// 1. The exit condition for the loop is canonicalized to compare the
				17	// induction value against the exit value. This turns loops like:
				18	// 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
				19	// 2. Any use outside of the loop of an expression derived from the indvar
				20	// is changed to compute the derived value outside of the loop, eliminating
				21	// the dependence on the exit value of the induction variable. If the only
				22	// purpose of the loop is to compute the exit value of some derived
				23	// expression, this transformation will make the loop dead.
				24	//
				25	//===----------------------------------------------------------------------===//
				26
				27	#define DEBUG_TYPE "indvars"
				28
				29	#include "polly/LinkAllPasses.h"
				30
				31	#include "llvm/Transforms/Scalar.h"
				32	#include "llvm/BasicBlock.h"
				33	#include "llvm/Constants.h"
				34	#include "llvm/Instructions.h"
				35	#include "llvm/IntrinsicInst.h"
				36	#include "llvm/LLVMContext.h"
				37	#include "llvm/Type.h"
				38	#include "llvm/Analysis/Dominators.h"
				39	#include "llvm/Analysis/IVUsers.h"
				40	#include "llvm/Analysis/ScalarEvolutionExpander.h"
				41	#include "llvm/Analysis/LoopInfo.h"
				42	#include "llvm/Analysis/LoopPass.h"
				43	#include "llvm/Support/CFG.h"
				44	#include "llvm/Support/CommandLine.h"
				45	#include "llvm/Support/Debug.h"
				46	#include "llvm/Support/raw_ostream.h"
				47	#include "llvm/Transforms/Utils/Local.h"
				48	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
				49	#include "llvm/Transforms/Utils/SimplifyIndVar.h"
				50	#include "llvm/Target/TargetData.h"
				51	#include "llvm/ADT/DenseMap.h"
				52	#include "llvm/ADT/SmallVector.h"
				53	#include "llvm/ADT/Statistic.h"
				54	using namespace llvm;
				55
				56	STATISTIC(NumRemoved , "Number of aux indvars removed");
				57	STATISTIC(NumWidened , "Number of indvars widened");
				58	STATISTIC(NumInserted , "Number of canonical indvars added");
				59	STATISTIC(NumReplaced , "Number of exit values replaced");
				60	STATISTIC(NumLFTR , "Number of loop exit tests replaced");
				61	STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
				62	STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
				63
				64	static const bool EnableIVRewrite = true;
				65	static const bool VerifyIndvars = false;
				66
				67	namespace {
Tobias Grosser	d1f12db	2012-03-23 08:02:05 +0000	[diff] [blame]	68	class PollyIndVarSimplify : public LoopPass {
Tobias Grosser	cef36d5	2012-02-14 14:02:33 +0000	[diff] [blame]	69	IVUsers *IU;
				70	LoopInfo *LI;
				71	ScalarEvolution *SE;
				72	DominatorTree *DT;
				73	TargetData *TD;
				74
				75	SmallVector<WeakVH, 16> DeadInsts;
				76	bool Changed;
				77	public:
				78
				79	static char ID; // Pass identification, replacement for typeid
Tobias Grosser	d1f12db	2012-03-23 08:02:05 +0000	[diff] [blame]	80	PollyIndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0),
Tobias Grosser	cef36d5	2012-02-14 14:02:33 +0000	[diff] [blame]	81	Changed(false) {
				82	initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
				83	}
				84
				85	virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
				86
				87	virtual void getAnalysisUsage(AnalysisUsage &AU) const {
				88	AU.addRequired<DominatorTree>();
				89	AU.addRequired<LoopInfo>();
				90	AU.addRequired<ScalarEvolution>();
				91	AU.addRequiredID(LoopSimplifyID);
				92	AU.addRequiredID(LCSSAID);
				93	if (EnableIVRewrite)
				94	AU.addRequired<IVUsers>();
				95	AU.addPreserved<ScalarEvolution>();
				96	AU.addPreservedID(LoopSimplifyID);
				97	AU.addPreservedID(LCSSAID);
				98	if (EnableIVRewrite)
				99	AU.addPreserved<IVUsers>();
				100	AU.setPreservesCFG();
				101	}
				102
				103	private:
				104	virtual void releaseMemory() {
				105	DeadInsts.clear();
				106	}
				107
				108	bool isValidRewrite(Value FromVal, Value ToVal);
				109
				110	void HandleFloatingPointIV(Loop L, PHINode PH);
				111	void RewriteNonIntegerIVs(Loop *L);
				112
				113	void SimplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LPPassManager &LPM);
				114
				115	void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
				116
				117	void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
				118
				119	Value LinearFunctionTestReplace(Loop L, const SCEV *BackedgeTakenCount,
				120	PHINode *IndVar, SCEVExpander &Rewriter);
				121
				122	void SinkUnusedInvariants(Loop *L);
				123	};
				124	}
				125
Tobias Grosser	d1f12db	2012-03-23 08:02:05 +0000	[diff] [blame]	126	char PollyIndVarSimplify::ID = 0;
				127	INITIALIZE_PASS_BEGIN(PollyIndVarSimplify, "polly-indvars",
Tobias Grosser	cef36d5	2012-02-14 14:02:33 +0000	[diff] [blame]	128	"Induction Variable Simplification (Polly version)", false,
				129	false)
				130	INITIALIZE_PASS_DEPENDENCY(DominatorTree)
				131	INITIALIZE_PASS_DEPENDENCY(LoopInfo)
				132	INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
				133	INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
				134	INITIALIZE_PASS_DEPENDENCY(LCSSA)
				135	INITIALIZE_PASS_DEPENDENCY(IVUsers)
Tobias Grosser	d1f12db	2012-03-23 08:02:05 +0000	[diff] [blame]	136	INITIALIZE_PASS_END(PollyIndVarSimplify, "polly-indvars",
Tobias Grosser	cef36d5	2012-02-14 14:02:33 +0000	[diff] [blame]	137	"Induction Variable Simplification (Polly version)", false,
				138	false)
				139
				140	Pass *polly::createIndVarSimplifyPass() {
Tobias Grosser	d1f12db	2012-03-23 08:02:05 +0000	[diff] [blame]	141	return new PollyIndVarSimplify();
Tobias Grosser	cef36d5	2012-02-14 14:02:33 +0000	[diff] [blame]	142	}
				143
				144	/// isValidRewrite - Return true if the SCEV expansion generated by the
				145	/// rewriter can replace the original value. SCEV guarantees that it
				146	/// produces the same value, but the way it is produced may be illegal IR.
				147	/// Ideally, this function will only be called for verification.
Tobias Grosser	d1f12db	2012-03-23 08:02:05 +0000	[diff] [blame]	148	bool PollyIndVarSimplify::isValidRewrite(Value FromVal, Value ToVal) {
Tobias Grosser	cef36d5	2012-02-14 14:02:33 +0000	[diff] [blame]	149	// If an SCEV expression subsumed multiple pointers, its expansion could
				150	// reassociate the GEP changing the base pointer. This is illegal because the
				151	// final address produced by a GEP chain must be inbounds relative to its
				152	// underlying object. Otherwise basic alias analysis, among other things,
				153	// could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
				154	// producing an expression involving multiple pointers. Until then, we must
				155	// bail out here.
				156	//
				157	// Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
				158	// because it understands lcssa phis while SCEV does not.
				159	Value *FromPtr = FromVal;
				160	Value *ToPtr = ToVal;
				161	if (GEPOperator *GEP = dyn_cast<GEPOperator>(FromVal)) {
				162	FromPtr = GEP->getPointerOperand();
				163	}
				164	if (GEPOperator *GEP = dyn_cast<GEPOperator>(ToVal)) {
				165	ToPtr = GEP->getPointerOperand();
				166	}
				167	if (FromPtr != FromVal \|\| ToPtr != ToVal) {
				168	// Quickly check the common case
				169	if (FromPtr == ToPtr)
				170	return true;
				171
				172	// SCEV may have rewritten an expression that produces the GEP's pointer
				173	// operand. That's ok as long as the pointer operand has the same base
				174	// pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
				175	// base of a recurrence. This handles the case in which SCEV expansion
				176	// converts a pointer type recurrence into a nonrecurrent pointer base
				177	// indexed by an integer recurrence.
				178
				179	// If the GEP base pointer is a vector of pointers, abort.
				180	if (!FromPtr->getType()->isPointerTy() \|\| !ToPtr->getType()->isPointerTy())
				181	return false;
				182
				183	const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
				184	const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
				185	if (FromBase == ToBase)
				186	return true;
				187
				188	DEBUG(dbgs() << "INDVARS: GEP rewrite bail out "
				189	<< FromBase << " != " << ToBase << "\n");
				190
				191	return false;
				192	}
				193	return true;
				194	}
				195
				196	/// Determine the insertion point for this user. By default, insert immediately
				197	/// before the user. SCEVExpander or LICM will hoist loop invariants out of the
				198	/// loop. For PHI nodes, there may be multiple uses, so compute the nearest
				199	/// common dominator for the incoming blocks.
				200	static Instruction getInsertPointForUses(Instruction User, Value *Def,
				201	DominatorTree *DT) {
				202	PHINode *PHI = dyn_cast<PHINode>(User);
				203	if (!PHI)
				204	return User;
				205
				206	Instruction *InsertPt = 0;
				207	for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
				208	if (PHI->getIncomingValue(i) != Def)
				209	continue;
				210
				211	BasicBlock *InsertBB = PHI->getIncomingBlock(i);
				212	if (!InsertPt) {
				213	InsertPt = InsertBB->getTerminator();
				214	continue;
				215	}
				216	InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
				217	InsertPt = InsertBB->getTerminator();
				218	}
				219	assert(InsertPt && "Missing phi operand");
				220	assert((!isa<Instruction>(Def) \|\|
				221	DT->dominates(cast<Instruction>(Def), InsertPt)) &&
				222	"def does not dominate all uses");
				223	return InsertPt;
				224	}
				225
				226	//===----------------------------------------------------------------------===//
				227	// RewriteNonIntegerIVs and helpers. Prefer integer IVs.
				228	//===----------------------------------------------------------------------===//
				229
				230	/// ConvertToSInt - Convert APF to an integer, if possible.
				231	static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
				232	bool isExact = false;
				233	if (&APF.getSemantics() == &APFloat::PPCDoubleDouble)
				234	return false;
				235	// See if we can convert this to an int64_t
				236	uint64_t UIntVal;
				237	if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
				238	&isExact) != APFloat::opOK \|\| !isExact)
				239	return false;
				240	IntVal = UIntVal;
				241	return true;
				242	}
				243
				244	/// HandleFloatingPointIV - If the loop has floating induction variable
				245	/// then insert corresponding integer induction variable if possible.
				246	/// For example,
				247	/// for(double i = 0; i < 10000; ++i)
				248	/// bar(i)
				249	/// is converted into
				250	/// for(int i = 0; i < 10000; ++i)
				251	/// bar((double)i);
				252	///
Tobias Grosser	d1f12db	2012-03-23 08:02:05 +0000	[diff] [blame]	253	void PollyIndVarSimplify::HandleFloatingPointIV(Loop L, PHINode PN) {
Tobias Grosser	cef36d5	2012-02-14 14:02:33 +0000	[diff] [blame]	254	unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
				255	unsigned BackEdge = IncomingEdge^1;
				256
				257	// Check incoming value.
				258	ConstantFP *InitValueVal =
				259	dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
				260
				261	int64_t InitValue;
				262	if (!InitValueVal \|\| !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
				263	return;
				264
				265	// Check IV increment. Reject this PN if increment operation is not
				266	// an add or increment value can not be represented by an integer.
				267	BinaryOperator *Incr =
				268	dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
				269	if (Incr == 0 \|\| Incr->getOpcode() != Instruction::FAdd) return;
				270
				271	// If this is not an add of the PHI with a constantfp, or if the constant fp
				272	// is not an integer, bail out.
				273	ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
				274	int64_t IncValue;
				275	if (IncValueVal == 0 \|\| Incr->getOperand(0) != PN \|\|
				276	!ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
				277	return;
				278
				279	// Check Incr uses. One user is PN and the other user is an exit condition
				280	// used by the conditional terminator.
				281	Value::use_iterator IncrUse = Incr->use_begin();
				282	Instruction U1 = cast<Instruction>(IncrUse++);
				283	if (IncrUse == Incr->use_end()) return;
				284	Instruction U2 = cast<Instruction>(IncrUse++);
				285	if (IncrUse != Incr->use_end()) return;
				286
				287	// Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
				288	// only used by a branch, we can't transform it.
				289	FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
				290	if (!Compare)
				291	Compare = dyn_cast<FCmpInst>(U2);
				292	if (Compare == 0 \|\| !Compare->hasOneUse() \|\|
				293	!isa<BranchInst>(Compare->use_back()))
				294	return;
				295
				296	BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
				297
				298	// We need to verify that the branch actually controls the iteration count
				299	// of the loop. If not, the new IV can overflow and no one will notice.
				300	// The branch block must be in the loop and one of the successors must be out
				301	// of the loop.
				302	assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
				303	if (!L->contains(TheBr->getParent()) \|\|
				304	(L->contains(TheBr->getSuccessor(0)) &&
				305	L->contains(TheBr->getSuccessor(1))))
				306	return;
				307
				308
				309	// If it isn't a comparison with an integer-as-fp (the exit value), we can't
				310	// transform it.
				311	ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
				312	int64_t ExitValue;
				313	if (ExitValueVal == 0 \|\|
				314	!ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
				315	return;
				316
				317	// Find new predicate for integer comparison.
				318	CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
				319	switch (Compare->getPredicate()) {
				320	default: return; // Unknown comparison.
				321	case CmpInst::FCMP_OEQ:
				322	case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
				323	case CmpInst::FCMP_ONE:
				324	case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
				325	case CmpInst::FCMP_OGT:
				326	case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
				327	case CmpInst::FCMP_OGE:
				328	case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
				329	case CmpInst::FCMP_OLT:
				330	case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
				331	case CmpInst::FCMP_OLE:
				332	case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
				333	}
				334
				335	// We convert the floating point induction variable to a signed i32 value if
				336	// we can. This is only safe if the comparison will not overflow in a way
				337	// that won't be trapped by the integer equivalent operations. Check for this
				338	// now.
				339	// TODO: We could use i64 if it is native and the range requires it.
				340
				341	// The start/stride/exit values must all fit in signed i32.
				342	if (!isInt<32>(InitValue) \|\| !isInt<32>(IncValue) \|\| !isInt<32>(ExitValue))
				343	return;
				344
				345	// If not actually striding (add x, 0.0), avoid touching the code.
				346	if (IncValue == 0)
				347	return;
				348
				349	// Positive and negative strides have different safety conditions.
				350	if (IncValue > 0) {
				351	// If we have a positive stride, we require the init to be less than the
				352	// exit value.
				353	if (InitValue >= ExitValue)
				354	return;
				355
				356	uint32_t Range = uint32_t(ExitValue-InitValue);
				357	// Check for infinite loop, either:
				358	// while (i <= Exit) or until (i > Exit)
				359	if (NewPred == CmpInst::ICMP_SLE \|\| NewPred == CmpInst::ICMP_SGT) {
				360	if (++Range == 0) return; // Range overflows.
				361	}
				362
				363	unsigned Leftover = Range % uint32_t(IncValue);
				364
				365	// If this is an equality comparison, we require that the strided value
				366	// exactly land on the exit value, otherwise the IV condition will wrap
				367	// around and do things the fp IV wouldn't.
				368	if ((NewPred == CmpInst::ICMP_EQ \|\| NewPred == CmpInst::ICMP_NE) &&
				369	Leftover != 0)
				370	return;
				371
				372	// If the stride would wrap around the i32 before exiting, we can't
				373	// transform the IV.
				374	if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
				375	return;
				376
				377	} else {
				378	// If we have a negative stride, we require the init to be greater than the
				379	// exit value.
				380	if (InitValue <= ExitValue)
				381	return;
				382
				383	uint32_t Range = uint32_t(InitValue-ExitValue);
				384	// Check for infinite loop, either:
				385	// while (i >= Exit) or until (i < Exit)
				386	if (NewPred == CmpInst::ICMP_SGE \|\| NewPred == CmpInst::ICMP_SLT) {
				387	if (++Range == 0) return; // Range overflows.
				388	}
				389
				390	unsigned Leftover = Range % uint32_t(-IncValue);
				391
				392	// If this is an equality comparison, we require that the strided value
				393	// exactly land on the exit value, otherwise the IV condition will wrap
				394	// around and do things the fp IV wouldn't.
				395	if ((NewPred == CmpInst::ICMP_EQ \|\| NewPred == CmpInst::ICMP_NE) &&
				396	Leftover != 0)
				397	return;
				398
				399	// If the stride would wrap around the i32 before exiting, we can't
				400	// transform the IV.
				401	if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
				402	return;
				403	}
				404
				405	IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
				406
				407	// Insert new integer induction variable.
				408	PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
				409	NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
				410	PN->getIncomingBlock(IncomingEdge));
				411
				412	Value *NewAdd =
				413	BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
				414	Incr->getName()+".int", Incr);
				415	NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
				416
				417	ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
				418	ConstantInt::get(Int32Ty, ExitValue),
				419	Compare->getName());
				420
				421	// In the following deletions, PN may become dead and may be deleted.
				422	// Use a WeakVH to observe whether this happens.
				423	WeakVH WeakPH = PN;
				424
				425	// Delete the old floating point exit comparison. The branch starts using the
				426	// new comparison.
				427	NewCompare->takeName(Compare);
				428	Compare->replaceAllUsesWith(NewCompare);
				429	RecursivelyDeleteTriviallyDeadInstructions(Compare);
				430
				431	// Delete the old floating point increment.
				432	Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
				433	RecursivelyDeleteTriviallyDeadInstructions(Incr);
				434
				435	// If the FP induction variable still has uses, this is because something else
				436	// in the loop uses its value. In order to canonicalize the induction
				437	// variable, we chose to eliminate the IV and rewrite it in terms of an
				438	// int->fp cast.
				439	//
				440	// We give preference to sitofp over uitofp because it is faster on most
				441	// platforms.
				442	if (WeakPH) {
				443	Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
				444	PN->getParent()->getFirstInsertionPt());
				445	PN->replaceAllUsesWith(Conv);
				446	RecursivelyDeleteTriviallyDeadInstructions(PN);
				447	}
				448
				449	// Add a new IVUsers entry for the newly-created integer PHI.
Tobias Grosser	d87492b	2012-03-23 08:02:15 +0000	[diff] [blame]	450	if (IU)
				451	IU->AddUsersIfInteresting(NewPHI);
Tobias Grosser	cef36d5	2012-02-14 14:02:33 +0000	[diff] [blame]	452
				453	Changed = true;
				454	}
				455
Tobias Grosser	d1f12db	2012-03-23 08:02:05 +0000	[diff] [blame]	456	void PollyIndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
Tobias Grosser	cef36d5	2012-02-14 14:02:33 +0000	[diff] [blame]	457	// First step. Check to see if there are any floating-point recurrences.
				458	// If there are, change them into integer recurrences, permitting analysis by
				459	// the SCEV routines.
				460	//
				461	BasicBlock *Header = L->getHeader();
				462
				463	SmallVector<WeakVH, 8> PHIs;
				464	for (BasicBlock::iterator I = Header->begin();
				465	PHINode *PN = dyn_cast<PHINode>(I); ++I)
				466	PHIs.push_back(PN);
				467
				468	for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
				469	if (PHINode PN = dyn_cast_or_null<PHINode>(&PHIs[i]))
				470	HandleFloatingPointIV(L, PN);
				471
				472	// If the loop previously had floating-point IV, ScalarEvolution
				473	// may not have been able to compute a trip count. Now that we've done some
				474	// re-writing, the trip count may be computable.
				475	if (Changed)
				476	SE->forgetLoop(L);
				477	}
				478
				479	//===----------------------------------------------------------------------===//
				480	// RewriteLoopExitValues - Optimize IV users outside the loop.
				481	// As a side effect, reduces the amount of IV processing within the loop.
				482	//===----------------------------------------------------------------------===//
				483
				484	/// RewriteLoopExitValues - Check to see if this loop has a computable
				485	/// loop-invariant execution count. If so, this means that we can compute the
				486	/// final value of any expressions that are recurrent in the loop, and
				487	/// substitute the exit values from the loop into any instructions outside of
				488	/// the loop that use the final values of the current expressions.
				489	///
				490	/// This is mostly redundant with the regular IndVarSimplify activities that
				491	/// happen later, except that it's more powerful in some cases, because it's
				492	/// able to brute-force evaluate arbitrary instructions as long as they have
				493	/// constant operands at the beginning of the loop.
Tobias Grosser	d1f12db	2012-03-23 08:02:05 +0000	[diff] [blame]	494	void PollyIndVarSimplify::RewriteLoopExitValues(Loop *L,
				495	SCEVExpander &Rewriter) {
Tobias Grosser	cef36d5	2012-02-14 14:02:33 +0000	[diff] [blame]	496	// Verify the input to the pass in already in LCSSA form.
				497	assert(L->isLCSSAForm(*DT));
				498
				499	SmallVector<BasicBlock*, 8> ExitBlocks;
				500	L->getUniqueExitBlocks(ExitBlocks);
				501
				502	// Find all values that are computed inside the loop, but used outside of it.
				503	// Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
				504	// the exit blocks of the loop to find them.
				505	for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
				506	BasicBlock *ExitBB = ExitBlocks[i];
				507
				508	// If there are no PHI nodes in this exit block, then no values defined
				509	// inside the loop are used on this path, skip it.
				510	PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
				511	if (!PN) continue;
				512
				513	unsigned NumPreds = PN->getNumIncomingValues();
				514
				515	// Iterate over all of the PHI nodes.
				516	BasicBlock::iterator BBI = ExitBB->begin();
				517	while ((PN = dyn_cast<PHINode>(BBI++))) {
				518	if (PN->use_empty())
				519	continue; // dead use, don't replace it
				520
				521	// SCEV only supports integer expressions for now.
				522	if (!PN->getType()->isIntegerTy() && !PN->getType()->isPointerTy())
				523	continue;
				524
				525	// It's necessary to tell ScalarEvolution about this explicitly so that
				526	// it can walk the def-use list and forget all SCEVs, as it may not be
				527	// watching the PHI itself. Once the new exit value is in place, there
				528	// may not be a def-use connection between the loop and every instruction
				529	// which got a SCEVAddRecExpr for that loop.
				530	SE->forgetValue(PN);
				531
				532	// Iterate over all of the values in all the PHI nodes.
				533	for (unsigned i = 0; i != NumPreds; ++i) {
				534	// If the value being merged in is not integer or is not defined
				535	// in the loop, skip it.
				536	Value *InVal = PN->getIncomingValue(i);
				537	if (!isa<Instruction>(InVal))
				538	continue;
				539
				540	// If this pred is for a subloop, not L itself, skip it.
				541	if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
				542	continue; // The Block is in a subloop, skip it.
				543
				544	// Check that InVal is defined in the loop.
				545	Instruction *Inst = cast<Instruction>(InVal);
				546	if (!L->contains(Inst))
				547	continue;
				548
				549	// Okay, this instruction has a user outside of the current loop
				550	// and varies predictably inside the loop. Evaluate the value it
				551	// contains when the loop exits, if possible.
				552	const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
				553	if (!SE->isLoopInvariant(ExitValue, L))
				554	continue;
				555
				556	Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
				557
				558	DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
				559	<< " LoopVal = " << *Inst << "\n");
				560
				561	if (!isValidRewrite(Inst, ExitVal)) {
				562	DeadInsts.push_back(ExitVal);
				563	continue;
				564	}
				565	Changed = true;
				566	++NumReplaced;
				567
				568	PN->setIncomingValue(i, ExitVal);
				569
				570	// If this instruction is dead now, delete it.
				571	RecursivelyDeleteTriviallyDeadInstructions(Inst);
				572
				573	if (NumPreds == 1) {
				574	// Completely replace a single-pred PHI. This is safe, because the
				575	// NewVal won't be variant in the loop, so we don't need an LCSSA phi
				576	// node anymore.
				577	PN->replaceAllUsesWith(ExitVal);
				578	RecursivelyDeleteTriviallyDeadInstructions(PN);
				579	}
				580	}
				581	if (NumPreds != 1) {
				582	// Clone the PHI and delete the original one. This lets IVUsers and
				583	// any other maps purge the original user from their records.
				584	PHINode *NewPN = cast<PHINode>(PN->clone());
				585	NewPN->takeName(PN);
				586	NewPN->insertBefore(PN);
				587	PN->replaceAllUsesWith(NewPN);
				588	PN->eraseFromParent();
				589	}
				590	}
				591	}
				592
				593	// The insertion point instruction may have been deleted; clear it out
				594	// so that the rewriter doesn't trip over it later.
				595	Rewriter.clearInsertPoint();
				596	}
				597
				598	//===----------------------------------------------------------------------===//
				599	// Rewrite IV users based on a canonical IV.
				600	// Only for use with -enable-iv-rewrite.
				601	//===----------------------------------------------------------------------===//
				602
				603	/// FIXME: It is an extremely bad idea to indvar substitute anything more
				604	/// complex than affine induction variables. Doing so will put expensive
				605	/// polynomial evaluations inside of the loop, and the str reduction pass
				606	/// currently can only reduce affine polynomials. For now just disable
				607	/// indvar subst on anything more complex than an affine addrec, unless
				608	/// it can be expanded to a trivial value.
				609	static bool isSafe(const SCEV S, const Loop L, ScalarEvolution *SE) {
				610	// Loop-invariant values are safe.
				611	if (SE->isLoopInvariant(S, L)) return true;
				612
				613	// Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
				614	// to transform them into efficient code.
				615	if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
				616	return AR->isAffine();
				617
				618	// An add is safe it all its operands are safe.
				619	if (const SCEVCommutativeExpr *Commutative
				620	= dyn_cast<SCEVCommutativeExpr>(S)) {
				621	for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
				622	E = Commutative->op_end(); I != E; ++I)
				623	if (!isSafe(*I, L, SE)) return false;
				624	return true;
				625	}
				626
				627	// A cast is safe if its operand is.
				628	if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
				629	return isSafe(C->getOperand(), L, SE);
				630
				631	// A udiv is safe if its operands are.
				632	if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
				633	return isSafe(UD->getLHS(), L, SE) &&
				634	isSafe(UD->getRHS(), L, SE);
				635
				636	// SCEVUnknown is always safe.
				637	if (isa<SCEVUnknown>(S))
				638	return true;
				639
				640	// Nothing else is safe.
				641	return false;
				642	}
				643
Tobias Grosser	d1f12db	2012-03-23 08:02:05 +0000	[diff] [blame]	644	void PollyIndVarSimplify::RewriteIVExpressions(Loop *L,
				645	SCEVExpander &Rewriter) {
Tobias Grosser	cef36d5	2012-02-14 14:02:33 +0000	[diff] [blame]	646	// Rewrite all induction variable expressions in terms of the canonical
				647	// induction variable.
				648	//
				649	// If there were induction variables of other sizes or offsets, manually
				650	// add the offsets to the primary induction variable and cast, avoiding
				651	// the need for the code evaluation methods to insert induction variables
				652	// of different sizes.
				653	for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
				654	Value *Op = UI->getOperandValToReplace();
				655	Type *UseTy = Op->getType();
				656	Instruction *User = UI->getUser();
				657
				658	// Compute the final addrec to expand into code.
				659	const SCEV AR = IU->getReplacementExpr(UI);
				660
				661	// Evaluate the expression out of the loop, if possible.
				662	if (!L->contains(UI->getUser())) {
				663	const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
				664	if (SE->isLoopInvariant(ExitVal, L))
				665	AR = ExitVal;
				666	}
				667
				668	// FIXME: It is an extremely bad idea to indvar substitute anything more
				669	// complex than affine induction variables. Doing so will put expensive
				670	// polynomial evaluations inside of the loop, and the str reduction pass
				671	// currently can only reduce affine polynomials. For now just disable
				672	// indvar subst on anything more complex than an affine addrec, unless
				673	// it can be expanded to a trivial value.
				674	if (!isSafe(AR, L, SE))
				675	continue;
				676
				677	// Determine the insertion point for this user. By default, insert
				678	// immediately before the user. The SCEVExpander class will automatically
				679	// hoist loop invariants out of the loop. For PHI nodes, there may be
				680	// multiple uses, so compute the nearest common dominator for the
				681	// incoming blocks.
				682	Instruction *InsertPt = getInsertPointForUses(User, Op, DT);
				683
				684	// Now expand it into actual Instructions and patch it into place.
				685	Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
				686
				687	DEBUG(dbgs() << "INDVARS: Rewrote IV '" << AR << "' " << Op << '\n'
				688	<< " into = " << *NewVal << "\n");
				689
				690	if (!isValidRewrite(Op, NewVal)) {
				691	DeadInsts.push_back(NewVal);
				692	continue;
				693	}
				694	// Inform ScalarEvolution that this value is changing. The change doesn't
				695	// affect its value, but it does potentially affect which use lists the
				696	// value will be on after the replacement, which affects ScalarEvolution's
				697	// ability to walk use lists and drop dangling pointers when a value is
				698	// deleted.
				699	SE->forgetValue(User);
				700
				701	// Patch the new value into place.
				702	if (Op->hasName())
				703	NewVal->takeName(Op);
				704	if (Instruction *NewValI = dyn_cast<Instruction>(NewVal))
				705	NewValI->setDebugLoc(User->getDebugLoc());
				706	User->replaceUsesOfWith(Op, NewVal);
				707	UI->setOperandValToReplace(NewVal);
				708
				709	++NumRemoved;
				710	Changed = true;
				711
				712	// The old value may be dead now.
				713	DeadInsts.push_back(Op);
				714	}
				715	}
				716
				717	//===----------------------------------------------------------------------===//
				718	// IV Widening - Extend the width of an IV to cover its widest uses.
				719	//===----------------------------------------------------------------------===//
				720
				721	namespace {
				722	// Collect information about induction variables that are used by sign/zero
				723	// extend operations. This information is recorded by CollectExtend and
				724	// provides the input to WidenIV.
				725	struct WideIVInfo {
				726	PHINode *NarrowIV;
				727	Type *WidestNativeType; // Widest integer type created [sz]ext
				728	bool IsSigned; // Was an sext user seen before a zext?
				729
				730	WideIVInfo() : NarrowIV(0), WidestNativeType(0), IsSigned(false) {}
				731	};
				732
				733	class WideIVVisitor : public IVVisitor {
				734	ScalarEvolution *SE;
				735	const TargetData *TD;
				736
				737	public:
				738	WideIVInfo WI;
				739
				740	WideIVVisitor(PHINode NarrowIV, ScalarEvolution SCEV,
				741	const TargetData *TData) :
				742	SE(SCEV), TD(TData) { WI.NarrowIV = NarrowIV; }
				743
				744	// Implement the interface used by simplifyUsersOfIV.
				745	virtual void visitCast(CastInst *Cast);
				746	};
				747	}
				748
				749	/// visitCast - Update information about the induction variable that is
				750	/// extended by this sign or zero extend operation. This is used to determine
				751	/// the final width of the IV before actually widening it.
				752	void WideIVVisitor::visitCast(CastInst *Cast) {
				753	bool IsSigned = Cast->getOpcode() == Instruction::SExt;
				754	if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
				755	return;
				756
				757	Type *Ty = Cast->getType();
				758	uint64_t Width = SE->getTypeSizeInBits(Ty);
				759	if (TD && !TD->isLegalInteger(Width))
				760	return;
				761
				762	if (!WI.WidestNativeType) {
				763	WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
				764	WI.IsSigned = IsSigned;
				765	return;
				766	}
				767
				768	// We extend the IV to satisfy the sign of its first user, arbitrarily.
				769	if (WI.IsSigned != IsSigned)
				770	return;
				771
				772	if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
				773	WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
				774	}
				775
				776	namespace {
				777
				778	/// NarrowIVDefUse - Record a link in the Narrow IV def-use chain along with the
				779	/// WideIV that computes the same value as the Narrow IV def. This avoids
				780	/// caching Use* pointers.
				781	struct NarrowIVDefUse {
				782	Instruction *NarrowDef;
				783	Instruction *NarrowUse;
				784	Instruction *WideDef;
				785
				786	NarrowIVDefUse(): NarrowDef(0), NarrowUse(0), WideDef(0) {}
				787
				788	NarrowIVDefUse(Instruction ND, Instruction NU, Instruction *WD):
				789	NarrowDef(ND), NarrowUse(NU), WideDef(WD) {}
				790	};
				791
				792	/// WidenIV - The goal of this transform is to remove sign and zero extends
				793	/// without creating any new induction variables. To do this, it creates a new
				794	/// phi of the wider type and redirects all users, either removing extends or
				795	/// inserting truncs whenever we stop propagating the type.
				796	///
				797	class WidenIV {
				798	// Parameters
				799	PHINode *OrigPhi;
				800	Type *WideType;
				801	bool IsSigned;
				802
				803	// Context
				804	LoopInfo *LI;
				805	Loop *L;
				806	ScalarEvolution *SE;
				807	DominatorTree *DT;
				808
				809	// Result
				810	PHINode *WidePhi;
				811	Instruction *WideInc;
				812	const SCEV *WideIncExpr;
				813	SmallVectorImpl<WeakVH> &DeadInsts;
				814
				815	SmallPtrSet<Instruction*,16> Widened;
				816	SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
				817
				818	public:
				819	WidenIV(const WideIVInfo &WI, LoopInfo *LInfo,
				820	ScalarEvolution SEv, DominatorTree DTree,
				821	SmallVectorImpl<WeakVH> &DI) :
				822	OrigPhi(WI.NarrowIV),
				823	WideType(WI.WidestNativeType),
				824	IsSigned(WI.IsSigned),
				825	LI(LInfo),
				826	L(LI->getLoopFor(OrigPhi->getParent())),
				827	SE(SEv),
				828	DT(DTree),
				829	WidePhi(0),
				830	WideInc(0),
				831	WideIncExpr(0),
				832	DeadInsts(DI) {
				833	assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
				834	}
				835
				836	PHINode *CreateWideIV(SCEVExpander &Rewriter);
				837
				838	protected:
				839	Value getExtend(Value NarrowOper, Type *WideType, bool IsSigned,
				840	Instruction *Use);
				841
				842	Instruction *CloneIVUser(NarrowIVDefUse DU);
				843
				844	const SCEVAddRecExpr GetWideRecurrence(Instruction NarrowUse);
				845
				846	const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU);
				847
				848	Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
				849
				850	void pushNarrowIVUsers(Instruction NarrowDef, Instruction WideDef);
				851	};
				852	} // anonymous namespace
				853
				854	/// isLoopInvariant - Perform a quick domtree based check for loop invariance
				855	/// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems
				856	/// gratuitous for this purpose.
				857	static bool isLoopInvariant(Value V, const Loop L, const DominatorTree *DT) {
				858	Instruction *Inst = dyn_cast<Instruction>(V);
				859	if (!Inst)
				860	return true;
				861
				862	return DT->properlyDominates(Inst->getParent(), L->getHeader());
				863	}
				864
				865	Value WidenIV::getExtend(Value NarrowOper, Type *WideType, bool IsSigned,
				866	Instruction *Use) {
				867	// Set the debug location and conservative insertion point.
				868	IRBuilder<> Builder(Use);
				869	// Hoist the insertion point into loop preheaders as far as possible.
				870	for (const Loop *L = LI->getLoopFor(Use->getParent());
				871	L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT);
				872	L = L->getParentLoop())
				873	Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
				874
				875	return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
				876	Builder.CreateZExt(NarrowOper, WideType);
				877	}
				878
				879	/// CloneIVUser - Instantiate a wide operation to replace a narrow
				880	/// operation. This only needs to handle operations that can evaluation to
				881	/// SCEVAddRec. It can safely return 0 for any operation we decide not to clone.
				882	Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
				883	unsigned Opcode = DU.NarrowUse->getOpcode();
				884	switch (Opcode) {
				885	default:
				886	return 0;
				887	case Instruction::Add:
				888	case Instruction::Mul:
				889	case Instruction::UDiv:
				890	case Instruction::Sub:
				891	case Instruction::And:
				892	case Instruction::Or:
				893	case Instruction::Xor:
				894	case Instruction::Shl:
				895	case Instruction::LShr:
				896	case Instruction::AShr:
				897	DEBUG(dbgs() << "Cloning IVUser: " << *DU.NarrowUse << "\n");
				898
				899	// Replace NarrowDef operands with WideDef. Otherwise, we don't know
				900	// anything about the narrow operand yet so must insert a [sz]ext. It is
				901	// probably loop invariant and will be folded or hoisted. If it actually
				902	// comes from a widened IV, it should be removed during a future call to
				903	// WidenIVUse.
				904	Value *LHS = (DU.NarrowUse->getOperand(0) == DU.NarrowDef) ? DU.WideDef :
				905	getExtend(DU.NarrowUse->getOperand(0), WideType, IsSigned, DU.NarrowUse);
				906	Value *RHS = (DU.NarrowUse->getOperand(1) == DU.NarrowDef) ? DU.WideDef :
				907	getExtend(DU.NarrowUse->getOperand(1), WideType, IsSigned, DU.NarrowUse);
				908
				909	BinaryOperator *NarrowBO = cast<BinaryOperator>(DU.NarrowUse);
				910	BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(),
				911	LHS, RHS,
				912	NarrowBO->getName());
				913	IRBuilder<> Builder(DU.NarrowUse);
				914	Builder.Insert(WideBO);
				915	if (const OverflowingBinaryOperator *OBO =
				916	dyn_cast<OverflowingBinaryOperator>(NarrowBO)) {
				917	if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap();
				918	if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap();
				919	}
				920	return WideBO;
				921	}
				922	llvm_unreachable(0);
				923	}
				924
				925	/// No-wrap operations can transfer sign extension of their result to their
				926	/// operands. Generate the SCEV value for the widened operation without
				927	/// actually modifying the IR yet. If the expression after extending the
				928	/// operands is an AddRec for this loop, return it.
				929	const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
				930	// Handle the common case of add<nsw/nuw>
				931	if (DU.NarrowUse->getOpcode() != Instruction::Add)
				932	return 0;
				933
				934	// One operand (NarrowDef) has already been extended to WideDef. Now determine
				935	// if extending the other will lead to a recurrence.
				936	unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
				937	assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
				938
				939	const SCEV *ExtendOperExpr = 0;
				940	const OverflowingBinaryOperator *OBO =
				941	cast<OverflowingBinaryOperator>(DU.NarrowUse);
				942	if (IsSigned && OBO->hasNoSignedWrap())
				943	ExtendOperExpr = SE->getSignExtendExpr(
				944	SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
				945	else if(!IsSigned && OBO->hasNoUnsignedWrap())
				946	ExtendOperExpr = SE->getZeroExtendExpr(
				947	SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
				948	else
				949	return 0;
				950
				951	// When creating this AddExpr, don't apply the current operations NSW or NUW
				952	// flags. This instruction may be guarded by control flow that the no-wrap
				953	// behavior depends on. Non-control-equivalent instructions can be mapped to
				954	// the same SCEV expression, and it would be incorrect to transfer NSW/NUW
				955	// semantics to those operations.
				956	const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(
				957	SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr));
				958
				959	if (!AddRec \|\| AddRec->getLoop() != L)
				960	return 0;
				961	return AddRec;
				962	}
				963
				964	/// GetWideRecurrence - Is this instruction potentially interesting from
				965	/// IVUsers' perspective after widening it's type? In other words, can the
				966	/// extend be safely hoisted out of the loop with SCEV reducing the value to a
				967	/// recurrence on the same loop. If so, return the sign or zero extended
				968	/// recurrence. Otherwise return NULL.
				969	const SCEVAddRecExpr WidenIV::GetWideRecurrence(Instruction NarrowUse) {
				970	if (!SE->isSCEVable(NarrowUse->getType()))
				971	return 0;
				972
				973	const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
				974	if (SE->getTypeSizeInBits(NarrowExpr->getType())
				975	>= SE->getTypeSizeInBits(WideType)) {
				976	// NarrowUse implicitly widens its operand. e.g. a gep with a narrow
				977	// index. So don't follow this use.
				978	return 0;
				979	}
				980
				981	const SCEV *WideExpr = IsSigned ?
				982	SE->getSignExtendExpr(NarrowExpr, WideType) :
				983	SE->getZeroExtendExpr(NarrowExpr, WideType);
				984	const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
				985	if (!AddRec \|\| AddRec->getLoop() != L)
				986	return 0;
				987	return AddRec;
				988	}
				989
				990	/// WidenIVUse - Determine whether an individual user of the narrow IV can be
				991	/// widened. If so, return the wide clone of the user.
				992	Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
				993
				994	// Stop traversing the def-use chain at inner-loop phis or post-loop phis.
				995	if (isa<PHINode>(DU.NarrowUse) &&
				996	LI->getLoopFor(DU.NarrowUse->getParent()) != L)
				997	return 0;
				998
				999	// Our raison d'etre! Eliminate sign and zero extension.
				1000	if (IsSigned ? isa<SExtInst>(DU.NarrowUse) : isa<ZExtInst>(DU.NarrowUse)) {
				1001	Value *NewDef = DU.WideDef;
				1002	if (DU.NarrowUse->getType() != WideType) {
				1003	unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
				1004	unsigned IVWidth = SE->getTypeSizeInBits(WideType);
				1005	if (CastWidth < IVWidth) {
				1006	// The cast isn't as wide as the IV, so insert a Trunc.
				1007	IRBuilder<> Builder(DU.NarrowUse);
				1008	NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
				1009	}
				1010	else {
				1011	// A wider extend was hidden behind a narrower one. This may induce
				1012	// another round of IV widening in which the intermediate IV becomes
				1013	// dead. It should be very rare.
				1014	DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
				1015	<< " not wide enough to subsume " << *DU.NarrowUse << "\n");
				1016	DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
				1017	NewDef = DU.NarrowUse;
				1018	}
				1019	}
				1020	if (NewDef != DU.NarrowUse) {
				1021	DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
				1022	<< " replaced by " << *DU.WideDef << "\n");
				1023	++NumElimExt;
				1024	DU.NarrowUse->replaceAllUsesWith(NewDef);
				1025	DeadInsts.push_back(DU.NarrowUse);
				1026	}
				1027	// Now that the extend is gone, we want to expose it's uses for potential
				1028	// further simplification. We don't need to directly inform SimplifyIVUsers
				1029	// of the new users, because their parent IV will be processed later as a
				1030	// new loop phi. If we preserved IVUsers analysis, we would also want to
				1031	// push the uses of WideDef here.
				1032
				1033	// No further widening is needed. The deceased [sz]ext had done it for us.
				1034	return 0;
				1035	}
				1036
				1037	// Does this user itself evaluate to a recurrence after widening?
				1038	const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse);
				1039	if (!WideAddRec) {
				1040	WideAddRec = GetExtendedOperandRecurrence(DU);
				1041	}
				1042	if (!WideAddRec) {
				1043	// This user does not evaluate to a recurence after widening, so don't
				1044	// follow it. Instead insert a Trunc to kill off the original use,
				1045	// eventually isolating the original narrow IV so it can be removed.
				1046	IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
				1047	Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
				1048	DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
				1049	return 0;
				1050	}
				1051	// Assume block terminators cannot evaluate to a recurrence. We can't to
				1052	// insert a Trunc after a terminator if there happens to be a critical edge.
				1053	assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
				1054	"SCEV is not expected to evaluate a block terminator");
				1055
				1056	// Reuse the IV increment that SCEVExpander created as long as it dominates
				1057	// NarrowUse.
				1058	Instruction *WideUse = 0;
				1059	if (WideAddRec == WideIncExpr
				1060	&& Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
				1061	WideUse = WideInc;
				1062	else {
				1063	WideUse = CloneIVUser(DU);
				1064	if (!WideUse)
				1065	return 0;
				1066	}
				1067	// Evaluation of WideAddRec ensured that the narrow expression could be
				1068	// extended outside the loop without overflow. This suggests that the wide use
				1069	// evaluates to the same expression as the extended narrow use, but doesn't
				1070	// absolutely guarantee it. Hence the following failsafe check. In rare cases
				1071	// where it fails, we simply throw away the newly created wide use.
				1072	if (WideAddRec != SE->getSCEV(WideUse)) {
				1073	DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
				1074	<< ": " << SE->getSCEV(WideUse) << " != " << WideAddRec << "\n");
				1075	DeadInsts.push_back(WideUse);
				1076	return 0;
				1077	}
				1078
				1079	// Returning WideUse pushes it on the worklist.
				1080	return WideUse;
				1081	}
				1082
				1083	/// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers.
				1084	///
				1085	void WidenIV::pushNarrowIVUsers(Instruction NarrowDef, Instruction WideDef) {
				1086	for (Value::use_iterator UI = NarrowDef->use_begin(),
				1087	UE = NarrowDef->use_end(); UI != UE; ++UI) {
				1088	Instruction NarrowUse = cast<Instruction>(UI);
				1089
				1090	// Handle data flow merges and bizarre phi cycles.
				1091	if (!Widened.insert(NarrowUse))
				1092	continue;
				1093
				1094	NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUse, WideDef));
				1095	}
				1096	}
				1097
				1098	/// CreateWideIV - Process a single induction variable. First use the
				1099	/// SCEVExpander to create a wide induction variable that evaluates to the same
				1100	/// recurrence as the original narrow IV. Then use a worklist to forward
				1101	/// traverse the narrow IV's def-use chain. After WidenIVUse has processed all
				1102	/// interesting IV users, the narrow IV will be isolated for removal by
				1103	/// DeleteDeadPHIs.
				1104	///
				1105	/// It would be simpler to delete uses as they are processed, but we must avoid
				1106	/// invalidating SCEV expressions.
				1107	///
				1108	PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
				1109	// Is this phi an induction variable?
				1110	const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
				1111	if (!AddRec)
				1112	return NULL;
				1113
				1114	// Widen the induction variable expression.
				1115	const SCEV *WideIVExpr = IsSigned ?
				1116	SE->getSignExtendExpr(AddRec, WideType) :
				1117	SE->getZeroExtendExpr(AddRec, WideType);
				1118
				1119	assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
				1120	"Expect the new IV expression to preserve its type");
				1121
				1122	// Can the IV be extended outside the loop without overflow?
				1123	AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
				1124	if (!AddRec \|\| AddRec->getLoop() != L)
				1125	return NULL;
				1126
				1127	// An AddRec must have loop-invariant operands. Since this AddRec is
				1128	// materialized by a loop header phi, the expression cannot have any post-loop
				1129	// operands, so they must dominate the loop header.
				1130	assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
				1131	SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader())
				1132	&& "Loop header phi recurrence inputs do not dominate the loop");
				1133
				1134	// The rewriter provides a value for the desired IV expression. This may
				1135	// either find an existing phi or materialize a new one. Either way, we
				1136	// expect a well-formed cyclic phi-with-increments. i.e. any operand not part
				1137	// of the phi-SCC dominates the loop entry.
				1138	Instruction *InsertPt = L->getHeader()->begin();
				1139	WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
				1140
				1141	// Remembering the WideIV increment generated by SCEVExpander allows
				1142	// WidenIVUse to reuse it when widening the narrow IV's increment. We don't
				1143	// employ a general reuse mechanism because the call above is the only call to
				1144	// SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
				1145	if (BasicBlock *LatchBlock = L->getLoopLatch()) {
				1146	WideInc =
				1147	cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
				1148	WideIncExpr = SE->getSCEV(WideInc);
				1149	}
				1150
				1151	DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
				1152	++NumWidened;
				1153
				1154	// Traverse the def-use chain using a worklist starting at the original IV.
				1155	assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
				1156
				1157	Widened.insert(OrigPhi);
				1158	pushNarrowIVUsers(OrigPhi, WidePhi);
				1159
				1160	while (!NarrowIVUsers.empty()) {
				1161	NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
				1162
				1163	// Process a def-use edge. This may replace the use, so don't hold a
				1164	// use_iterator across it.
				1165	Instruction *WideUse = WidenIVUse(DU, Rewriter);
				1166
				1167	// Follow all def-use edges from the previous narrow use.
				1168	if (WideUse)
				1169	pushNarrowIVUsers(DU.NarrowUse, WideUse);
				1170
				1171	// WidenIVUse may have removed the def-use edge.
				1172	if (DU.NarrowDef->use_empty())
				1173	DeadInsts.push_back(DU.NarrowDef);
				1174	}
				1175	return WidePhi;
				1176	}
				1177
				1178	//===----------------------------------------------------------------------===//
				1179	// Simplification of IV users based on SCEV evaluation.
				1180	//===----------------------------------------------------------------------===//
				1181
				1182
				1183	/// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV
				1184	/// users. Each successive simplification may push more users which may
				1185	/// themselves be candidates for simplification.
				1186	///
				1187	/// Sign/Zero extend elimination is interleaved with IV simplification.
				1188	///
Tobias Grosser	d1f12db	2012-03-23 08:02:05 +0000	[diff] [blame]	1189	void PollyIndVarSimplify::SimplifyAndExtend(Loop *L,
				1190	SCEVExpander &Rewriter,
				1191	LPPassManager &LPM) {
Tobias Grosser	cef36d5	2012-02-14 14:02:33 +0000	[diff] [blame]	1192	SmallVector<WideIVInfo, 8> WideIVs;
				1193
				1194	SmallVector<PHINode*, 8> LoopPhis;
				1195	for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
				1196	LoopPhis.push_back(cast<PHINode>(I));
				1197	}
				1198	// Each round of simplification iterates through the SimplifyIVUsers worklist
				1199	// for all current phis, then determines whether any IVs can be
				1200	// widened. Widening adds new phis to LoopPhis, inducing another round of
				1201	// simplification on the wide IVs.
				1202	while (!LoopPhis.empty()) {
				1203	// Evaluate as many IV expressions as possible before widening any IVs. This
				1204	// forces SCEV to set no-wrap flags before evaluating sign/zero
				1205	// extension. The first time SCEV attempts to normalize sign/zero extension,
				1206	// the result becomes final. So for the most predictable results, we delay
				1207	// evaluation of sign/zero extend evaluation until needed, and avoid running
				1208	// other SCEV based analysis prior to SimplifyAndExtend.
				1209	do {
				1210	PHINode *CurrIV = LoopPhis.pop_back_val();
				1211
				1212	// Information about sign/zero extensions of CurrIV.
				1213	WideIVVisitor WIV(CurrIV, SE, TD);
				1214
				1215	Changed \|= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &WIV);
				1216
				1217	if (WIV.WI.WidestNativeType) {
				1218	WideIVs.push_back(WIV.WI);
				1219	}
				1220	} while(!LoopPhis.empty());
				1221
				1222	for (; !WideIVs.empty(); WideIVs.pop_back()) {
				1223	WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts);
				1224	if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) {
				1225	Changed = true;
				1226	LoopPhis.push_back(WidePhi);
				1227	}
				1228	}
				1229	}
				1230	}
				1231
				1232	//===----------------------------------------------------------------------===//
				1233	// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
				1234	//===----------------------------------------------------------------------===//
				1235
				1236	/// Check for expressions that ScalarEvolution generates to compute
				1237	/// BackedgeTakenInfo. If these expressions have not been reduced, then
				1238	/// expanding them may incur additional cost (albeit in the loop preheader).
				1239	static bool isHighCostExpansion(const SCEV S, BranchInst BI,
				1240	SmallPtrSet<const SCEV*, 8> &Processed,
				1241	ScalarEvolution *SE) {
				1242	if (!Processed.insert(S))
				1243	return false;
				1244
				1245	// If the backedge-taken count is a UDiv, it's very likely a UDiv that
				1246	// ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a
				1247	// precise expression, rather than a UDiv from the user's code. If we can't
				1248	// find a UDiv in the code with some simple searching, assume the former and
				1249	// forego rewriting the loop.
				1250	if (isa<SCEVUDivExpr>(S)) {
				1251	ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
				1252	if (!OrigCond) return true;
				1253	const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
				1254	R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
				1255	if (R != S) {
				1256	const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
				1257	L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
				1258	if (L != S)
				1259	return true;
				1260	}
				1261	}
				1262
				1263	if (EnableIVRewrite)
				1264	return false;
				1265
				1266	// Recurse past add expressions, which commonly occur in the
				1267	// BackedgeTakenCount. They may already exist in program code, and if not,
				1268	// they are not too expensive rematerialize.
				1269	if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
				1270	for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
				1271	I != E; ++I) {
				1272	if (isHighCostExpansion(*I, BI, Processed, SE))
				1273	return true;
				1274	}
				1275	return false;
				1276	}
				1277
				1278	// HowManyLessThans uses a Max expression whenever the loop is not guarded by
				1279	// the exit condition.
				1280	if (isa<SCEVSMaxExpr>(S) \|\| isa<SCEVUMaxExpr>(S))
				1281	return true;
				1282
				1283	// If we haven't recognized an expensive SCEV pattern, assume it's an
				1284	// expression produced by program code.
				1285	return false;
				1286	}
				1287
				1288	/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
				1289	/// count expression can be safely and cheaply expanded into an instruction
				1290	/// sequence that can be used by LinearFunctionTestReplace.
				1291	///
				1292	/// TODO: This fails for pointer-type loop counters with greater than one byte
				1293	/// strides, consequently preventing LFTR from running. For the purpose of LFTR
				1294	/// we could skip this check in the case that the LFTR loop counter (chosen by
				1295	/// FindLoopCounter) is also pointer type. Instead, we could directly convert
				1296	/// the loop test to an inequality test by checking the target data's alignment
				1297	/// of element types (given that the initial pointer value originates from or is
				1298	/// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
				1299	/// However, we don't yet have a strong motivation for converting loop tests
				1300	/// into inequality tests.
				1301	static bool canExpandBackedgeTakenCount(Loop L, ScalarEvolution SE) {
				1302	const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
				1303	if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) \|\|
				1304	BackedgeTakenCount->isZero())
				1305	return false;
				1306
				1307	if (!L->getExitingBlock())
				1308	return false;
				1309
				1310	// Can't rewrite non-branch yet.
				1311	BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
				1312	if (!BI)
				1313	return false;
				1314
				1315	SmallPtrSet<const SCEV*, 8> Processed;
				1316	if (isHighCostExpansion(BackedgeTakenCount, BI, Processed, SE))
				1317	return false;
				1318
				1319	return true;
				1320	}
				1321
				1322	/// getBackedgeIVType - Get the widest type used by the loop test after peeking
				1323	/// through Truncs.
				1324	///
				1325	/// TODO: Unnecessary when ForceLFTR is removed.
				1326	static Type getBackedgeIVType(Loop L) {
				1327	if (!L->getExitingBlock())
				1328	return 0;
				1329
				1330	// Can't rewrite non-branch yet.
				1331	BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
				1332	if (!BI)
				1333	return 0;
				1334
				1335	ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
				1336	if (!Cond)
				1337	return 0;
				1338
				1339	Type *Ty = 0;
				1340	for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end();
				1341	OI != OE; ++OI) {
				1342	assert((!Ty \|\| Ty == (*OI)->getType()) && "bad icmp operand types");
				1343	TruncInst Trunc = dyn_cast<TruncInst>(OI);
				1344	if (!Trunc)
				1345	continue;
				1346
				1347	return Trunc->getSrcTy();
				1348	}
				1349	return Ty;
				1350	}
				1351
				1352	/// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop
				1353	/// invariant value to the phi.
				1354	static PHINode getLoopPhiForCounter(Value IncV, Loop L, DominatorTree DT) {
				1355	Instruction *IncI = dyn_cast<Instruction>(IncV);
				1356	if (!IncI)
				1357	return 0;
				1358
				1359	switch (IncI->getOpcode()) {
				1360	case Instruction::Add:
				1361	case Instruction::Sub:
				1362	break;
				1363	case Instruction::GetElementPtr:
				1364	// An IV counter must preserve its type.
				1365	if (IncI->getNumOperands() == 2)
				1366	break;
				1367	default:
				1368	return 0;
				1369	}
				1370
				1371	PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
				1372	if (Phi && Phi->getParent() == L->getHeader()) {
				1373	if (isLoopInvariant(IncI->getOperand(1), L, DT))
				1374	return Phi;
				1375	return 0;
				1376	}
				1377	if (IncI->getOpcode() == Instruction::GetElementPtr)
				1378	return 0;
				1379
				1380	// Allow add/sub to be commuted.
				1381	Phi = dyn_cast<PHINode>(IncI->getOperand(1));
				1382	if (Phi && Phi->getParent() == L->getHeader()) {
				1383	if (isLoopInvariant(IncI->getOperand(0), L, DT))
				1384	return Phi;
				1385	}
				1386	return 0;
				1387	}
				1388
				1389	/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show
				1390	/// that the current exit test is already sufficiently canonical.
				1391	static bool needsLFTR(Loop L, DominatorTree DT) {
				1392	assert(L->getExitingBlock() && "expected loop exit");
				1393
				1394	BasicBlock *LatchBlock = L->getLoopLatch();
				1395	// Don't bother with LFTR if the loop is not properly simplified.
				1396	if (!LatchBlock)
				1397	return false;
				1398
				1399	BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
				1400	assert(BI && "expected exit branch");
				1401
				1402	// Do LFTR to simplify the exit condition to an ICMP.
				1403	ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
				1404	if (!Cond)
				1405	return true;
				1406
				1407	// Do LFTR to simplify the exit ICMP to EQ/NE
				1408	ICmpInst::Predicate Pred = Cond->getPredicate();
				1409	if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
				1410	return true;
				1411
				1412	// Look for a loop invariant RHS
				1413	Value *LHS = Cond->getOperand(0);
				1414	Value *RHS = Cond->getOperand(1);
				1415	if (!isLoopInvariant(RHS, L, DT)) {
				1416	if (!isLoopInvariant(LHS, L, DT))
				1417	return true;
				1418	std::swap(LHS, RHS);
				1419	}
				1420	// Look for a simple IV counter LHS
				1421	PHINode *Phi = dyn_cast<PHINode>(LHS);
				1422	if (!Phi)
				1423	Phi = getLoopPhiForCounter(LHS, L, DT);
				1424
				1425	if (!Phi)
				1426	return true;
				1427
				1428	// Do LFTR if the exit condition's IV is not a simple counter.
				1429	Value *IncV = Phi->getIncomingValueForBlock(L->getLoopLatch());
				1430	return Phi != getLoopPhiForCounter(IncV, L, DT);
				1431	}
				1432
				1433	/// AlmostDeadIV - Return true if this IV has any uses other than the (soon to
				1434	/// be rewritten) loop exit test.
				1435	static bool AlmostDeadIV(PHINode Phi, BasicBlock LatchBlock, Value *Cond) {
				1436	int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
				1437	Value *IncV = Phi->getIncomingValue(LatchIdx);
				1438
				1439	for (Value::use_iterator UI = Phi->use_begin(), UE = Phi->use_end();
				1440	UI != UE; ++UI) {
				1441	if (UI != Cond && UI != IncV) return false;
				1442	}
				1443
				1444	for (Value::use_iterator UI = IncV->use_begin(), UE = IncV->use_end();
				1445	UI != UE; ++UI) {
				1446	if (UI != Cond && UI != Phi) return false;
				1447	}
				1448	return true;
				1449	}
				1450
				1451	/// FindLoopCounter - Find an affine IV in canonical form.
				1452	///
				1453	/// BECount may be an i8* pointer type. The pointer difference is already
				1454	/// valid count without scaling the address stride, so it remains a pointer
				1455	/// expression as far as SCEV is concerned.
				1456	///
				1457	/// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
				1458	///
				1459	/// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
				1460	/// This is difficult in general for SCEV because of potential overflow. But we
				1461	/// could at least handle constant BECounts.
				1462	static PHINode *
				1463	FindLoopCounter(Loop L, const SCEV BECount,
				1464	ScalarEvolution SE, DominatorTree DT, const TargetData *TD) {
				1465	uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
				1466
				1467	Value *Cond =
				1468	cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
				1469
				1470	// Loop over all of the PHI nodes, looking for a simple counter.
				1471	PHINode *BestPhi = 0;
				1472	const SCEV *BestInit = 0;
				1473	BasicBlock *LatchBlock = L->getLoopLatch();
				1474	assert(LatchBlock && "needsLFTR should guarantee a loop latch");
				1475
				1476	for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
				1477	PHINode *Phi = cast<PHINode>(I);
				1478	if (!SE->isSCEVable(Phi->getType()))
				1479	continue;
				1480
				1481	// Avoid comparing an integer IV against a pointer Limit.
				1482	if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
				1483	continue;
				1484
				1485	const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
				1486	if (!AR \|\| AR->getLoop() != L \|\| !AR->isAffine())
				1487	continue;
				1488
				1489	// AR may be a pointer type, while BECount is an integer type.
				1490	// AR may be wider than BECount. With eq/ne tests overflow is immaterial.
				1491	// AR may not be a narrower type, or we may never exit.
				1492	uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
				1493	if (PhiWidth < BCWidth \|\| (TD && !TD->isLegalInteger(PhiWidth)))
				1494	continue;
				1495
				1496	const SCEV Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE));
				1497	if (!Step \|\| !Step->isOne())
				1498	continue;
				1499
				1500	int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
				1501	Value *IncV = Phi->getIncomingValue(LatchIdx);
				1502	if (getLoopPhiForCounter(IncV, L, DT) != Phi)
				1503	continue;
				1504
				1505	const SCEV *Init = AR->getStart();
				1506
				1507	if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
				1508	// Don't force a live loop counter if another IV can be used.
				1509	if (AlmostDeadIV(Phi, LatchBlock, Cond))
				1510	continue;
				1511
				1512	// Prefer to count-from-zero. This is a more "canonical" counter form. It
				1513	// also prefers integer to pointer IVs.
				1514	if (BestInit->isZero() != Init->isZero()) {
				1515	if (BestInit->isZero())
				1516	continue;
				1517	}
				1518	// If two IVs both count from zero or both count from nonzero then the
				1519	// narrower is likely a dead phi that has been widened. Use the wider phi
				1520	// to allow the other to be eliminated.
				1521	if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
				1522	continue;
				1523	}
				1524	BestPhi = Phi;
				1525	BestInit = Init;
				1526	}
				1527	return BestPhi;
				1528	}
				1529
				1530	/// genLoopLimit - Help LinearFunctionTestReplace by generating a value that
				1531	/// holds the RHS of the new loop test.
				1532	static Value genLoopLimit(PHINode IndVar, const SCEV IVCount, Loop L,
				1533	SCEVExpander &Rewriter, ScalarEvolution *SE) {
				1534	const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
				1535	assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
				1536	const SCEV *IVInit = AR->getStart();
				1537
				1538	// IVInit may be a pointer while IVCount is an integer when FindLoopCounter
				1539	// finds a valid pointer IV. Sign extend BECount in order to materialize a
				1540	// GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
				1541	// the existing GEPs whenever possible.
				1542	if (IndVar->getType()->isPointerTy()
				1543	&& !IVCount->getType()->isPointerTy()) {
				1544
				1545	Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
				1546	const SCEV *IVOffset = SE->getTruncateOrSignExtend(IVCount, OfsTy);
				1547
				1548	// Expand the code for the iteration count.
				1549	assert(SE->isLoopInvariant(IVOffset, L) &&
				1550	"Computed iteration count is not loop invariant!");
				1551	BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
				1552	Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
				1553
				1554	Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
				1555	assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
				1556	// We could handle pointer IVs other than i8*, but we need to compensate for
				1557	// gep index scaling. See canExpandBackedgeTakenCount comments.
				1558	assert(SE->getSizeOfExpr(
				1559	cast<PointerType>(GEPBase->getType())->getElementType())->isOne()
				1560	&& "unit stride pointer IV must be i8*");
				1561
				1562	IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
				1563	return Builder.CreateGEP(GEPBase, GEPOffset, "lftr.limit");
				1564	}
				1565	else {
				1566	// In any other case, convert both IVInit and IVCount to integers before
				1567	// comparing. This may result in SCEV expension of pointers, but in practice
				1568	// SCEV will fold the pointer arithmetic away as such:
				1569	// BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
				1570	//
				1571	// Valid Cases: (1) both integers is most common; (2) both may be pointers
				1572	// for simple memset-style loops; (3) IVInit is an integer and IVCount is a
				1573	// pointer may occur when enable-iv-rewrite generates a canonical IV on top
				1574	// of case #2.
				1575
				1576	const SCEV *IVLimit = 0;
				1577	// For unit stride, IVCount = Start + BECount with 2's complement overflow.
				1578	// For non-zero Start, compute IVCount here.
				1579	if (AR->getStart()->isZero())
				1580	IVLimit = IVCount;
				1581	else {
				1582	assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
				1583	const SCEV *IVInit = AR->getStart();
				1584
				1585	// For integer IVs, truncate the IV before computing IVInit + BECount.
				1586	if (SE->getTypeSizeInBits(IVInit->getType())
				1587	> SE->getTypeSizeInBits(IVCount->getType()))
				1588	IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
				1589
				1590	IVLimit = SE->getAddExpr(IVInit, IVCount);
				1591	}
				1592	// Expand the code for the iteration count.
				1593	BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
				1594	IRBuilder<> Builder(BI);
				1595	assert(SE->isLoopInvariant(IVLimit, L) &&
				1596	"Computed iteration count is not loop invariant!");
				1597	// Ensure that we generate the same type as IndVar, or a smaller integer
				1598	// type. In the presence of null pointer values, we have an integer type
				1599	// SCEV expression (IVInit) for a pointer type IV value (IndVar).
				1600	Type *LimitTy = IVCount->getType()->isPointerTy() ?
				1601	IndVar->getType() : IVCount->getType();
				1602	return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
				1603	}
				1604	}
				1605
				1606	/// LinearFunctionTestReplace - This method rewrites the exit condition of the
				1607	/// loop to be a canonical != comparison against the incremented loop induction
				1608	/// variable. This pass is able to rewrite the exit tests of any loop where the
				1609	/// SCEV analysis can determine a loop-invariant trip count of the loop, which
				1610	/// is actually a much broader range than just linear tests.
Tobias Grosser	d1f12db	2012-03-23 08:02:05 +0000	[diff] [blame]	1611	Value *PollyIndVarSimplify::
Tobias Grosser	cef36d5	2012-02-14 14:02:33 +0000	[diff] [blame]	1612	LinearFunctionTestReplace(Loop *L,
				1613	const SCEV *BackedgeTakenCount,
				1614	PHINode *IndVar,
				1615	SCEVExpander &Rewriter) {
				1616	assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
				1617
				1618	// LFTR can ignore IV overflow and truncate to the width of
				1619	// BECount. This avoids materializing the add(zext(add)) expression.
				1620	Type *CntTy = !EnableIVRewrite ?
				1621	BackedgeTakenCount->getType() : IndVar->getType();
				1622
				1623	const SCEV *IVCount = BackedgeTakenCount;
				1624
				1625	// If the exiting block is the same as the backedge block, we prefer to
				1626	// compare against the post-incremented value, otherwise we must compare
				1627	// against the preincremented value.
				1628	Value *CmpIndVar;
				1629	if (L->getExitingBlock() == L->getLoopLatch()) {
				1630	// Add one to the "backedge-taken" count to get the trip count.
				1631	// If this addition may overflow, we have to be more pessimistic and
				1632	// cast the induction variable before doing the add.
				1633	const SCEV *N =
				1634	SE->getAddExpr(IVCount, SE->getConstant(IVCount->getType(), 1));
				1635	if (CntTy == IVCount->getType())
				1636	IVCount = N;
				1637	else {
				1638	const SCEV *Zero = SE->getConstant(IVCount->getType(), 0);
				1639	if ((isa<SCEVConstant>(N) && !N->isZero()) \|\|
				1640	SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
				1641	// No overflow. Cast the sum.
				1642	IVCount = SE->getTruncateOrZeroExtend(N, CntTy);
				1643	} else {
				1644	// Potential overflow. Cast before doing the add.
				1645	IVCount = SE->getTruncateOrZeroExtend(IVCount, CntTy);
				1646	IVCount = SE->getAddExpr(IVCount, SE->getConstant(CntTy, 1));
				1647	}
				1648	}
				1649	// The BackedgeTaken expression contains the number of times that the
				1650	// backedge branches to the loop header. This is one less than the
				1651	// number of times the loop executes, so use the incremented indvar.
				1652	CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
				1653	} else {
				1654	// We must use the preincremented value...
				1655	IVCount = SE->getTruncateOrZeroExtend(IVCount, CntTy);
				1656	CmpIndVar = IndVar;
				1657	}
				1658
				1659	Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
				1660	assert(ExitCnt->getType()->isPointerTy() == IndVar->getType()->isPointerTy()
				1661	&& "genLoopLimit missed a cast");
				1662
				1663	// Insert a new icmp_ne or icmp_eq instruction before the branch.
				1664	BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
				1665	ICmpInst::Predicate P;
				1666	if (L->contains(BI->getSuccessor(0)))
				1667	P = ICmpInst::ICMP_NE;
				1668	else
				1669	P = ICmpInst::ICMP_EQ;
				1670
				1671	DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
				1672	<< " LHS:" << *CmpIndVar << '\n'
				1673	<< " op:\t"
				1674	<< (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
				1675	<< " RHS:\t" << *ExitCnt << "\n"
				1676	<< " IVCount:\t" << *IVCount << "\n");
				1677
				1678	IRBuilder<> Builder(BI);
				1679	if (SE->getTypeSizeInBits(CmpIndVar->getType())
				1680	> SE->getTypeSizeInBits(ExitCnt->getType())) {
				1681	CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
				1682	"lftr.wideiv");
				1683	}
				1684
				1685	Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
				1686	Value *OrigCond = BI->getCondition();
				1687	// It's tempting to use replaceAllUsesWith here to fully replace the old
				1688	// comparison, but that's not immediately safe, since users of the old
				1689	// comparison may not be dominated by the new comparison. Instead, just
				1690	// update the branch to use the new comparison; in the common case this
				1691	// will make old comparison dead.
				1692	BI->setCondition(Cond);
				1693	DeadInsts.push_back(OrigCond);
				1694
				1695	++NumLFTR;
				1696	Changed = true;
				1697	return Cond;
				1698	}
				1699
				1700	//===----------------------------------------------------------------------===//
				1701	// SinkUnusedInvariants. A late subpass to cleanup loop preheaders.
				1702	//===----------------------------------------------------------------------===//
				1703
				1704	/// If there's a single exit block, sink any loop-invariant values that
				1705	/// were defined in the preheader but not used inside the loop into the
				1706	/// exit block to reduce register pressure in the loop.
Tobias Grosser	d1f12db	2012-03-23 08:02:05 +0000	[diff] [blame]	1707	void PollyIndVarSimplify::SinkUnusedInvariants(Loop *L) {
Tobias Grosser	cef36d5	2012-02-14 14:02:33 +0000	[diff] [blame]	1708	BasicBlock *ExitBlock = L->getExitBlock();
				1709	if (!ExitBlock) return;
				1710
				1711	BasicBlock *Preheader = L->getLoopPreheader();
				1712	if (!Preheader) return;
				1713
				1714	Instruction *InsertPt = ExitBlock->getFirstInsertionPt();
				1715	BasicBlock::iterator I = Preheader->getTerminator();
				1716	while (I != Preheader->begin()) {
				1717	--I;
				1718	// New instructions were inserted at the end of the preheader.
				1719	if (isa<PHINode>(I))
				1720	break;
				1721
				1722	// Don't move instructions which might have side effects, since the side
				1723	// effects need to complete before instructions inside the loop. Also don't
				1724	// move instructions which might read memory, since the loop may modify
				1725	// memory. Note that it's okay if the instruction might have undefined
				1726	// behavior: LoopSimplify guarantees that the preheader dominates the exit
				1727	// block.
				1728	if (I->mayHaveSideEffects() \|\| I->mayReadFromMemory())
				1729	continue;
				1730
				1731	// Skip debug info intrinsics.
				1732	if (isa<DbgInfoIntrinsic>(I))
				1733	continue;
				1734
				1735	// Skip landingpad instructions.
				1736	if (isa<LandingPadInst>(I))
				1737	continue;
				1738
				1739	// Don't sink alloca: we never want to sink static alloca's out of the
				1740	// entry block, and correctly sinking dynamic alloca's requires
				1741	// checks for stacksave/stackrestore intrinsics.
				1742	// FIXME: Refactor this check somehow?
				1743	if (isa<AllocaInst>(I))
				1744	continue;
				1745
				1746	// Determine if there is a use in or before the loop (direct or
				1747	// otherwise).
				1748	bool UsedInLoop = false;
				1749	for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
				1750	UI != UE; ++UI) {
				1751	User U = UI;
				1752	BasicBlock *UseBB = cast<Instruction>(U)->getParent();
				1753	if (PHINode *P = dyn_cast<PHINode>(U)) {
				1754	unsigned i =
				1755	PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
				1756	UseBB = P->getIncomingBlock(i);
				1757	}
				1758	if (UseBB == Preheader \|\| L->contains(UseBB)) {
				1759	UsedInLoop = true;
				1760	break;
				1761	}
				1762	}
				1763
				1764	// If there is, the def must remain in the preheader.
				1765	if (UsedInLoop)
				1766	continue;
				1767
				1768	// Otherwise, sink it to the exit block.
				1769	Instruction *ToMove = I;
				1770	bool Done = false;
				1771
				1772	if (I != Preheader->begin()) {
				1773	// Skip debug info intrinsics.
				1774	do {
				1775	--I;
				1776	} while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
				1777
				1778	if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
				1779	Done = true;
				1780	} else {
				1781	Done = true;
				1782	}
				1783
				1784	ToMove->moveBefore(InsertPt);
				1785	if (Done) break;
				1786	InsertPt = ToMove;
				1787	}
				1788	}
				1789
				1790	//===----------------------------------------------------------------------===//
				1791	// IndVarSimplify driver. Manage several subpasses of IV simplification.
				1792	//===----------------------------------------------------------------------===//
				1793
Tobias Grosser	d1f12db	2012-03-23 08:02:05 +0000	[diff] [blame]	1794	bool PollyIndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
Tobias Grosser	cef36d5	2012-02-14 14:02:33 +0000	[diff] [blame]	1795	// If LoopSimplify form is not available, stay out of trouble. Some notes:
				1796	// - LSR currently only supports LoopSimplify-form loops. Indvars'
				1797	// canonicalization can be a pessimization without LSR to "clean up"
				1798	// afterwards.
				1799	// - We depend on having a preheader; in particular,
				1800	// Loop::getCanonicalInductionVariable only supports loops with preheaders,
				1801	// and we're in trouble if we can't find the induction variable even when
				1802	// we've manually inserted one.
				1803	if (!L->isLoopSimplifyForm())
				1804	return false;
				1805
				1806	if (EnableIVRewrite)
				1807	IU = &getAnalysis<IVUsers>();
				1808	LI = &getAnalysis<LoopInfo>();
				1809	SE = &getAnalysis<ScalarEvolution>();
				1810	DT = &getAnalysis<DominatorTree>();
				1811	TD = getAnalysisIfAvailable<TargetData>();
				1812
				1813	DeadInsts.clear();
				1814	Changed = false;
				1815
				1816	// If there are any floating-point recurrences, attempt to
				1817	// transform them to use integer recurrences.
				1818	RewriteNonIntegerIVs(L);
				1819
				1820	const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
				1821
				1822	// Create a rewriter object which we'll use to transform the code with.
				1823	SCEVExpander Rewriter(*SE, "indvars");
				1824	#ifndef NDEBUG
				1825	Rewriter.setDebugType(DEBUG_TYPE);
				1826	#endif
				1827
				1828	// Eliminate redundant IV users.
				1829	//
				1830	// Simplification works best when run before other consumers of SCEV. We
				1831	// attempt to avoid evaluating SCEVs for sign/zero extend operations until
				1832	// other expressions involving loop IVs have been evaluated. This helps SCEV
				1833	// set no-wrap flags before normalizing sign/zero extension.
				1834	if (!EnableIVRewrite) {
				1835	Rewriter.disableCanonicalMode();
				1836	SimplifyAndExtend(L, Rewriter, LPM);
				1837	}
				1838
				1839	// Check to see if this loop has a computable loop-invariant execution count.
				1840	// If so, this means that we can compute the final value of any expressions
				1841	// that are recurrent in the loop, and substitute the exit values from the
				1842	// loop into any instructions outside of the loop that use the final values of
				1843	// the current expressions.
				1844	//
				1845	if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
				1846	RewriteLoopExitValues(L, Rewriter);
				1847
				1848	// Eliminate redundant IV users.
Tobias Grosser	3e72197	2012-03-23 08:02:19 +0000	[diff] [blame]	1849	// FIXME: Disabled as the function was removed from LLVM trunk. We may get
				1850	// along with this, as Polly does not need a lot of simplifications,
				1851	// but just a canonical induction variable. In the near future, we
				1852	// should remove the need of canonical induction variables all
				1853	// together.
				1854	//if (EnableIVRewrite)
				1855	// Changed \|= simplifyIVUsers(IU, SE, &LPM, DeadInsts);
Tobias Grosser	cef36d5	2012-02-14 14:02:33 +0000	[diff] [blame]	1856
				1857	// Eliminate redundant IV cycles.
				1858	if (!EnableIVRewrite)
				1859	NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
				1860
				1861	// Compute the type of the largest recurrence expression, and decide whether
				1862	// a canonical induction variable should be inserted.
				1863	Type *LargestType = 0;
				1864	bool NeedCannIV = false;
				1865	bool ExpandBECount = canExpandBackedgeTakenCount(L, SE);
				1866	if (EnableIVRewrite && ExpandBECount) {
				1867	// If we have a known trip count and a single exit block, we'll be
				1868	// rewriting the loop exit test condition below, which requires a
				1869	// canonical induction variable.
				1870	NeedCannIV = true;
				1871	Type *Ty = BackedgeTakenCount->getType();
				1872	if (!EnableIVRewrite) {
				1873	// In this mode, SimplifyIVUsers may have already widened the IV used by
				1874	// the backedge test and inserted a Trunc on the compare's operand. Get
				1875	// the wider type to avoid creating a redundant narrow IV only used by the
				1876	// loop test.
				1877	LargestType = getBackedgeIVType(L);
				1878	}
				1879	if (!LargestType \|\|
				1880	SE->getTypeSizeInBits(Ty) >
				1881	SE->getTypeSizeInBits(LargestType))
				1882	LargestType = SE->getEffectiveSCEVType(Ty);
				1883	}
				1884	if (EnableIVRewrite) {
				1885	for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
				1886	NeedCannIV = true;
				1887	Type *Ty =
				1888	SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType());
				1889	if (!LargestType \|\|
				1890	SE->getTypeSizeInBits(Ty) >
				1891	SE->getTypeSizeInBits(LargestType))
				1892	LargestType = Ty;
				1893	}
				1894	}
				1895
				1896	// Now that we know the largest of the induction variable expressions
				1897	// in this loop, insert a canonical induction variable of the largest size.
				1898	PHINode *IndVar = 0;
				1899	if (NeedCannIV) {
				1900	// Check to see if the loop already has any canonical-looking induction
				1901	// variables. If any are present and wider than the planned canonical
				1902	// induction variable, temporarily remove them, so that the Rewriter
				1903	// doesn't attempt to reuse them.
				1904	SmallVector<PHINode *, 2> OldCannIVs;
				1905	while (PHINode *OldCannIV = L->getCanonicalInductionVariable()) {
				1906	if (SE->getTypeSizeInBits(OldCannIV->getType()) >
				1907	SE->getTypeSizeInBits(LargestType))
				1908	OldCannIV->removeFromParent();
				1909	else
				1910	break;
				1911	OldCannIVs.push_back(OldCannIV);
				1912	}
				1913
				1914	IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L, LargestType);
				1915
				1916	++NumInserted;
				1917	Changed = true;
				1918	DEBUG(dbgs() << "INDVARS: New CanIV: " << *IndVar << '\n');
				1919
				1920	// Now that the official induction variable is established, reinsert
				1921	// any old canonical-looking variables after it so that the IR remains
				1922	// consistent. They will be deleted as part of the dead-PHI deletion at
				1923	// the end of the pass.
				1924	while (!OldCannIVs.empty()) {
				1925	PHINode *OldCannIV = OldCannIVs.pop_back_val();
				1926	OldCannIV->insertBefore(L->getHeader()->getFirstInsertionPt());
				1927	}
				1928	}
				1929	else if (!EnableIVRewrite && ExpandBECount && needsLFTR(L, DT)) {
				1930	IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD);
				1931	}
				1932	// If we have a trip count expression, rewrite the loop's exit condition
				1933	// using it. We can currently only handle loops with a single exit.
				1934	Value *NewICmp = 0;
				1935	if (ExpandBECount && IndVar) {
				1936	// Check preconditions for proper SCEVExpander operation. SCEV does not
				1937	// express SCEVExpander's dependencies, such as LoopSimplify. Instead any
				1938	// pass that uses the SCEVExpander must do it. This does not work well for
				1939	// loop passes because SCEVExpander makes assumptions about all loops, while
				1940	// LoopPassManager only forces the current loop to be simplified.
				1941	//
				1942	// FIXME: SCEV expansion has no way to bail out, so the caller must
				1943	// explicitly check any assumptions made by SCEV. Brittle.
				1944	const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
				1945	if (!AR \|\| AR->getLoop()->getLoopPreheader())
				1946	NewICmp =
				1947	LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, Rewriter);
				1948	}
				1949	// Rewrite IV-derived expressions.
				1950	if (EnableIVRewrite)
				1951	RewriteIVExpressions(L, Rewriter);
				1952
				1953	// Clear the rewriter cache, because values that are in the rewriter's cache
				1954	// can be deleted in the loop below, causing the AssertingVH in the cache to
				1955	// trigger.
				1956	Rewriter.clear();
				1957
				1958	// Now that we're done iterating through lists, clean up any instructions
				1959	// which are now dead.
				1960	while (!DeadInsts.empty())
				1961	if (Instruction *Inst =
				1962	dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
				1963	RecursivelyDeleteTriviallyDeadInstructions(Inst);
				1964
				1965	// The Rewriter may not be used from this point on.
				1966
				1967	// Loop-invariant instructions in the preheader that aren't used in the
				1968	// loop may be sunk below the loop to reduce register pressure.
				1969	SinkUnusedInvariants(L);
				1970
				1971	// For completeness, inform IVUsers of the IV use in the newly-created
				1972	// loop exit test instruction.
				1973	if (IU && NewICmp) {
				1974	ICmpInst *NewICmpInst = dyn_cast<ICmpInst>(NewICmp);
Tobias Grosser	d87492b	2012-03-23 08:02:15 +0000	[diff] [blame]	1975	if (NewICmpInst)
				1976	IU->AddUsersIfInteresting(cast<Instruction>(NewICmpInst->getOperand(0)));
Tobias Grosser	cef36d5	2012-02-14 14:02:33 +0000	[diff] [blame]	1977	}
				1978	// Clean up dead instructions.
				1979	Changed \|= DeleteDeadPHIs(L->getHeader());
				1980	// Check a post-condition.
				1981	assert(L->isLCSSAForm(*DT) &&
				1982	"Indvars did not leave the loop in lcssa form!");
				1983
				1984	// Verify that LFTR, and any other change have not interfered with SCEV's
				1985	// ability to compute trip count.
				1986	#ifndef NDEBUG
				1987	if (!EnableIVRewrite && VerifyIndvars &&
				1988	!isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
				1989	SE->forgetLoop(L);
				1990	const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
				1991	if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
				1992	SE->getTypeSizeInBits(NewBECount->getType()))
				1993	NewBECount = SE->getTruncateOrNoop(NewBECount,
				1994	BackedgeTakenCount->getType());
				1995	else
				1996	BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
				1997	NewBECount->getType());
				1998	assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
				1999	}
				2000	#endif
				2001
				2002	return Changed;
				2003	}