Add the last part that is needed for vectorization of if-converted code.
Added the code that actually performs the if-conversion during vectorization.
We can now vectorize this code:
for (int i=0; i<n; ++i) {
unsigned k = 0;
if (a[i] > b[i]) <------ IF inside the loop.
k = k * 5 + 3;
a[i] = k; <---- K is a phi node that becomes vector-select.
}
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169217 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0e33228..f538e08 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -50,6 +50,7 @@
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
@@ -134,6 +135,9 @@
}
private:
+ /// A small list of PHINodes.
+ typedef SmallVector<PHINode*, 4> PhiVector;
+
/// Add code that checks at runtime if the accessed arrays overlap.
/// Returns the comperator value or NULL if no check is needed.
Value *addRuntimeCheck(LoopVectorizationLegality *Legal,
@@ -142,6 +146,19 @@
void createEmptyLoop(LoopVectorizationLegality *Legal);
/// Copy and widen the instructions from the old loop.
void vectorizeLoop(LoopVectorizationLegality *Legal);
+
+ /// A helper function that computes the predicate of the block BB, assuming
+ /// that the header block of the loop is set to True. It returns the *entry*
+ /// mask for the block BB.
+ Value *createBlockInMask(BasicBlock *BB);
+ /// A helper function that computes the predicate of the edge between SRC
+ /// and DST.
+ Value *createEdgeMask(BasicBlock *Src, BasicBlock *Dst);
+
+ /// A helper function to vectorize a single BB within the innermost loop.
+ void vectorizeBlockInLoop(LoopVectorizationLegality *Legal, BasicBlock *BB,
+ PhiVector *PV);
+
/// Insert the new loop to the loop hierarchy and pass manager
/// and update the analysis passes.
void updateAnalysis();
@@ -816,7 +833,7 @@
DL->getIntPtrType(SE->getContext());
// Find the loop boundaries.
- const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getHeader());
+ const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getLoopLatch());
assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count");
// Get the total trip count from the count by adding 1.
@@ -838,7 +855,6 @@
OldInduction->getIncomingValueForBlock(BypassBlock):
ConstantInt::get(IdxTy, 0);
- assert(OrigLoop->getNumBlocks() == 1 && "Invalid loop");
assert(BypassBlock && "Invalid loop structure");
// Generate the code that checks in runtime if arrays overlap.
@@ -1044,7 +1060,6 @@
// the cost-model.
//
//===------------------------------------------------===//
- typedef SmallVector<PHINode*, 4> PhiVector;
BasicBlock &BB = *OrigLoop->getHeader();
Constant *Zero = ConstantInt::get(
IntegerType::getInt32Ty(BB.getContext()), 0);
@@ -1059,250 +1074,17 @@
// construct the PHI.
PhiVector RdxPHIsToFix;
- // For each instruction in the old loop.
- for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) {
- Instruction *Inst = it;
+ // Scan the loop in a topological order to ensure that defs are vectorized
+ // before users.
+ LoopBlocksDFS DFS(OrigLoop);
+ DFS.perform(LI);
- switch (Inst->getOpcode()) {
- case Instruction::Br:
- // Nothing to do for PHIs and BR, since we already took care of the
- // loop control flow instructions.
- continue;
- case Instruction::PHI:{
- PHINode* P = cast<PHINode>(Inst);
- // Handle reduction variables:
- if (Legal->getReductionVars()->count(P)) {
- // This is phase one of vectorizing PHIs.
- Type *VecTy = VectorType::get(Inst->getType(), VF);
- WidenMap[Inst] = PHINode::Create(VecTy, 2, "vec.phi",
- LoopVectorBody->getFirstInsertionPt());
- RdxPHIsToFix.push_back(P);
- continue;
- }
+ // Vectorize all of the blocks in the original loop.
+ for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(),
+ be = DFS.endRPO(); bb != be; ++bb)
+ vectorizeBlockInLoop(Legal, *bb, &RdxPHIsToFix);
- // This PHINode must be an induction variable.
- // Make sure that we know about it.
- assert(Legal->getInductionVars()->count(P) &&
- "Not an induction variable");
-
- if (P->getType()->isIntegerTy()) {
- assert(P == OldInduction && "Unexpected PHI");
- Value *Broadcasted = getBroadcastInstrs(Induction);
- // After broadcasting the induction variable we need to make the
- // vector consecutive by adding 0, 1, 2 ...
- Value *ConsecutiveInduction = getConsecutiveVector(Broadcasted);
-
- WidenMap[OldInduction] = ConsecutiveInduction;
- continue;
- }
-
- // Handle pointer inductions.
- assert(P->getType()->isPointerTy() && "Unexpected type.");
- Value *StartIdx = OldInduction ?
- Legal->getInductionVars()->lookup(OldInduction) :
- ConstantInt::get(Induction->getType(), 0);
-
- // This is the pointer value coming into the loop.
- Value *StartPtr = Legal->getInductionVars()->lookup(P);
-
- // This is the normalized GEP that starts counting at zero.
- Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
- "normalized.idx");
-
- // This is the vector of results. Notice that we don't generate vector
- // geps because scalar geps result in better code.
- Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
- for (unsigned int i = 0; i < VF; ++i) {
- Constant *Idx = ConstantInt::get(Induction->getType(), i);
- Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
- Value *SclrGep = Builder.CreateGEP(StartPtr, GlobalIdx, "next.gep");
- VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
- Builder.getInt32(i),
- "insert.gep");
- }
-
- WidenMap[Inst] = VecVal;
- continue;
- }
- case Instruction::Add:
- case Instruction::FAdd:
- case Instruction::Sub:
- case Instruction::FSub:
- case Instruction::Mul:
- case Instruction::FMul:
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::FDiv:
- case Instruction::URem:
- case Instruction::SRem:
- case Instruction::FRem:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor: {
- // Just widen binops.
- BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
- Value *A = getVectorValue(Inst->getOperand(0));
- Value *B = getVectorValue(Inst->getOperand(1));
-
- // Use this vector value for all users of the original instruction.
- Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B);
- WidenMap[Inst] = V;
-
- // Update the NSW, NUW and Exact flags.
- BinaryOperator *VecOp = cast<BinaryOperator>(V);
- if (isa<OverflowingBinaryOperator>(BinOp)) {
- VecOp->setHasNoSignedWrap(BinOp->hasNoSignedWrap());
- VecOp->setHasNoUnsignedWrap(BinOp->hasNoUnsignedWrap());
- }
- if (isa<PossiblyExactOperator>(VecOp))
- VecOp->setIsExact(BinOp->isExact());
- break;
- }
- case Instruction::Select: {
- // Widen selects.
- // If the selector is loop invariant we can create a select
- // instruction with a scalar condition. Otherwise, use vector-select.
- Value *Cond = Inst->getOperand(0);
- bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(Cond), OrigLoop);
-
- // The condition can be loop invariant but still defined inside the
- // loop. This means that we can't just use the original 'cond' value.
- // We have to take the 'vectorized' value and pick the first lane.
- // Instcombine will make this a no-op.
- Cond = getVectorValue(Cond);
- if (InvariantCond)
- Cond = Builder.CreateExtractElement(Cond, Builder.getInt32(0));
-
- Value *Op0 = getVectorValue(Inst->getOperand(1));
- Value *Op1 = getVectorValue(Inst->getOperand(2));
- WidenMap[Inst] = Builder.CreateSelect(Cond, Op0, Op1);
- break;
- }
-
- case Instruction::ICmp:
- case Instruction::FCmp: {
- // Widen compares. Generate vector compares.
- bool FCmp = (Inst->getOpcode() == Instruction::FCmp);
- CmpInst *Cmp = dyn_cast<CmpInst>(Inst);
- Value *A = getVectorValue(Inst->getOperand(0));
- Value *B = getVectorValue(Inst->getOperand(1));
- if (FCmp)
- WidenMap[Inst] = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
- else
- WidenMap[Inst] = Builder.CreateICmp(Cmp->getPredicate(), A, B);
- break;
- }
-
- case Instruction::Store: {
- // Attempt to issue a wide store.
- StoreInst *SI = dyn_cast<StoreInst>(Inst);
- Type *StTy = VectorType::get(SI->getValueOperand()->getType(), VF);
- Value *Ptr = SI->getPointerOperand();
- unsigned Alignment = SI->getAlignment();
-
- assert(!Legal->isUniform(Ptr) &&
- "We do not allow storing to uniform addresses");
-
- GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
-
- // This store does not use GEPs.
- if (!Legal->isConsecutivePtr(Ptr)) {
- scalarizeInstruction(Inst);
- break;
- }
-
- if (Gep) {
- // The last index does not have to be the induction. It can be
- // consecutive and be a function of the index. For example A[I+1];
- unsigned NumOperands = Gep->getNumOperands();
- Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands - 1));
- LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
-
- // Create the new GEP with the new induction variable.
- GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
- Gep2->setOperand(NumOperands - 1, LastIndex);
- Ptr = Builder.Insert(Gep2);
- } else {
- // Use the induction element ptr.
- assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
- Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero);
- }
- Ptr = Builder.CreateBitCast(Ptr, StTy->getPointerTo());
- Value *Val = getVectorValue(SI->getValueOperand());
- Builder.CreateStore(Val, Ptr)->setAlignment(Alignment);
- break;
- }
- case Instruction::Load: {
- // Attempt to issue a wide load.
- LoadInst *LI = dyn_cast<LoadInst>(Inst);
- Type *RetTy = VectorType::get(LI->getType(), VF);
- Value *Ptr = LI->getPointerOperand();
- unsigned Alignment = LI->getAlignment();
- GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
-
- // If the pointer is loop invariant or if it is non consecutive,
- // scalarize the load.
- bool Con = Legal->isConsecutivePtr(Ptr);
- if (Legal->isUniform(Ptr) || !Con) {
- scalarizeInstruction(Inst);
- break;
- }
-
- if (Gep) {
- // The last index does not have to be the induction. It can be
- // consecutive and be a function of the index. For example A[I+1];
- unsigned NumOperands = Gep->getNumOperands();
- Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1));
- LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
-
- // Create the new GEP with the new induction variable.
- GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
- Gep2->setOperand(NumOperands - 1, LastIndex);
- Ptr = Builder.Insert(Gep2);
- } else {
- // Use the induction element ptr.
- assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
- Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero);
- }
-
- Ptr = Builder.CreateBitCast(Ptr, RetTy->getPointerTo());
- LI = Builder.CreateLoad(Ptr);
- LI->setAlignment(Alignment);
- // Use this vector value for all users of the load.
- WidenMap[Inst] = LI;
- break;
- }
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::FPExt:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::SIToFP:
- case Instruction::UIToFP:
- case Instruction::Trunc:
- case Instruction::FPTrunc:
- case Instruction::BitCast: {
- /// Vectorize bitcasts.
- CastInst *CI = dyn_cast<CastInst>(Inst);
- Value *A = getVectorValue(Inst->getOperand(0));
- Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF);
- WidenMap[Inst] = Builder.CreateCast(CI->getOpcode(), A, DestTy);
- break;
- }
-
- default:
- /// All other instructions are unsupported. Scalarize them.
- scalarizeInstruction(Inst);
- break;
- }// end of switch.
- }// end of for_each instr.
-
- // At this point every instruction in the original loop is widended to
+ // At this point every instruction in the original loop is widened to
// a vector form. We are almost done. Now, we need to fix the PHI nodes
// that we vectorized. The PHI nodes are currently empty because we did
// not want to introduce cycles. Notice that the remaining PHI nodes
@@ -1426,6 +1208,313 @@
}// end of for each redux variable.
}
+Value *InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
+ assert(std::find(pred_begin(Dst), pred_end(Dst), Src) != pred_end(Dst) &&
+ "Invalid edge");
+
+ Value *SrcMask = createBlockInMask(Src);
+
+ // The terminator has to be a branch inst!
+ BranchInst *BI = dyn_cast<BranchInst>(Src->getTerminator());
+ assert(BI && "Unexpected terminator found");
+
+ Value *EdgeMask = SrcMask;
+ if (BI->isConditional()) {
+ EdgeMask = getVectorValue(BI->getCondition());
+ if (BI->getSuccessor(0) != Dst)
+ EdgeMask = Builder.CreateNot(EdgeMask);
+ }
+
+ return Builder.CreateAnd(EdgeMask, SrcMask);
+}
+
+Value *InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
+ assert(OrigLoop->contains(BB) && "Block is not a part of a loop");
+
+ // Loop incoming mask is all-one.
+ if (OrigLoop->getHeader() == BB)
+ return getVectorValue(
+ ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 1));
+
+ // This is the block mask. We OR all incoming edges, and with zero.
+ Value *BlockMask = getVectorValue(
+ ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 0));
+
+ // For each pred:
+ for (pred_iterator it = pred_begin(BB), e = pred_end(BB); it != e; ++it)
+ BlockMask = Builder.CreateOr(BlockMask, createEdgeMask(*it, BB));
+
+ return BlockMask;
+}
+
+void
+InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
+ BasicBlock *BB, PhiVector *PV) {
+ Constant *Zero =
+ ConstantInt::get(IntegerType::getInt32Ty(BB->getContext()), 0);
+
+ // For each instruction in the old loop.
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ switch (it->getOpcode()) {
+ case Instruction::Br:
+ // Nothing to do for PHIs and BR, since we already took care of the
+ // loop control flow instructions.
+ continue;
+ case Instruction::PHI:{
+ PHINode* P = cast<PHINode>(it);
+ // Handle reduction variables:
+ if (Legal->getReductionVars()->count(P)) {
+ // This is phase one of vectorizing PHIs.
+ Type *VecTy = VectorType::get(it->getType(), VF);
+ WidenMap[it] =
+ PHINode::Create(VecTy, 2, "vec.phi",
+ LoopVectorBody->getFirstInsertionPt());
+ PV->push_back(P);
+ continue;
+ }
+
+ // Check for PHI nodes that are lowered to vector selects.
+ if (P->getParent() != OrigLoop->getHeader()) {
+ // We know that all PHIs in non header blocks are converted into
+ // selects, so we don't have to worry about the insertion order and we
+ // can just use the builder.
+
+ // At this point we generate the predication tree. There may be
+ // duplications since this is a simple recursive scan, but future
+ // optimizations will clean it up.
+ Value *Cond = createBlockInMask(P->getIncomingBlock(0));
+ WidenMap[P] =
+ Builder.CreateSelect(Cond,
+ getVectorValue(P->getIncomingValue(0)),
+ getVectorValue(P->getIncomingValue(1)),
+ "predphi");
+ continue;
+ }
+
+ // This PHINode must be an induction variable.
+ // Make sure that we know about it.
+ assert(Legal->getInductionVars()->count(P) &&
+ "Not an induction variable");
+
+ if (P->getType()->isIntegerTy()) {
+ assert(P == OldInduction && "Unexpected PHI");
+ Value *Broadcasted = getBroadcastInstrs(Induction);
+ // After broadcasting the induction variable we need to make the
+ // vector consecutive by adding 0, 1, 2 ...
+ Value *ConsecutiveInduction = getConsecutiveVector(Broadcasted);
+
+ WidenMap[OldInduction] = ConsecutiveInduction;
+ continue;
+ }
+
+ // Handle pointer inductions.
+ assert(P->getType()->isPointerTy() && "Unexpected type.");
+ Value *StartIdx = OldInduction ?
+ Legal->getInductionVars()->lookup(OldInduction) :
+ ConstantInt::get(Induction->getType(), 0);
+
+ // This is the pointer value coming into the loop.
+ Value *StartPtr = Legal->getInductionVars()->lookup(P);
+
+ // This is the normalized GEP that starts counting at zero.
+ Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
+ "normalized.idx");
+
+ // This is the vector of results. Notice that we don't generate vector
+ // geps because scalar geps result in better code.
+ Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
+ for (unsigned int i = 0; i < VF; ++i) {
+ Constant *Idx = ConstantInt::get(Induction->getType(), i);
+ Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
+ Value *SclrGep = Builder.CreateGEP(StartPtr, GlobalIdx, "next.gep");
+ VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
+ Builder.getInt32(i),
+ "insert.gep");
+ }
+
+ WidenMap[it] = VecVal;
+ continue;
+ }
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ // Just widen binops.
+ BinaryOperator *BinOp = dyn_cast<BinaryOperator>(it);
+ Value *A = getVectorValue(it->getOperand(0));
+ Value *B = getVectorValue(it->getOperand(1));
+
+ // Use this vector value for all users of the original instruction.
+ Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B);
+ WidenMap[it] = V;
+
+ // Update the NSW, NUW and Exact flags.
+ BinaryOperator *VecOp = cast<BinaryOperator>(V);
+ if (isa<OverflowingBinaryOperator>(BinOp)) {
+ VecOp->setHasNoSignedWrap(BinOp->hasNoSignedWrap());
+ VecOp->setHasNoUnsignedWrap(BinOp->hasNoUnsignedWrap());
+ }
+ if (isa<PossiblyExactOperator>(VecOp))
+ VecOp->setIsExact(BinOp->isExact());
+ break;
+ }
+ case Instruction::Select: {
+ // Widen selects.
+ // If the selector is loop invariant we can create a select
+ // instruction with a scalar condition. Otherwise, use vector-select.
+ Value *Cond = it->getOperand(0);
+ bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(Cond), OrigLoop);
+
+ // The condition can be loop invariant but still defined inside the
+ // loop. This means that we can't just use the original 'cond' value.
+ // We have to take the 'vectorized' value and pick the first lane.
+ // Instcombine will make this a no-op.
+ Cond = getVectorValue(Cond);
+ if (InvariantCond)
+ Cond = Builder.CreateExtractElement(Cond, Builder.getInt32(0));
+
+ Value *Op0 = getVectorValue(it->getOperand(1));
+ Value *Op1 = getVectorValue(it->getOperand(2));
+ WidenMap[it] = Builder.CreateSelect(Cond, Op0, Op1);
+ break;
+ }
+
+ case Instruction::ICmp:
+ case Instruction::FCmp: {
+ // Widen compares. Generate vector compares.
+ bool FCmp = (it->getOpcode() == Instruction::FCmp);
+ CmpInst *Cmp = dyn_cast<CmpInst>(it);
+ Value *A = getVectorValue(it->getOperand(0));
+ Value *B = getVectorValue(it->getOperand(1));
+ if (FCmp)
+ WidenMap[it] = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
+ else
+ WidenMap[it] = Builder.CreateICmp(Cmp->getPredicate(), A, B);
+ break;
+ }
+
+ case Instruction::Store: {
+ // Attempt to issue a wide store.
+ StoreInst *SI = dyn_cast<StoreInst>(it);
+ Type *StTy = VectorType::get(SI->getValueOperand()->getType(), VF);
+ Value *Ptr = SI->getPointerOperand();
+ unsigned Alignment = SI->getAlignment();
+
+ assert(!Legal->isUniform(Ptr) &&
+ "We do not allow storing to uniform addresses");
+
+ GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
+
+ // This store does not use GEPs.
+ if (!Legal->isConsecutivePtr(Ptr)) {
+ scalarizeInstruction(it);
+ break;
+ }
+
+ if (Gep) {
+ // The last index does not have to be the induction. It can be
+ // consecutive and be a function of the index. For example A[I+1];
+ unsigned NumOperands = Gep->getNumOperands();
+ Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands - 1));
+ LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
+
+ // Create the new GEP with the new induction variable.
+ GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
+ Gep2->setOperand(NumOperands - 1, LastIndex);
+ Ptr = Builder.Insert(Gep2);
+ } else {
+ // Use the induction element ptr.
+ assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
+ Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero);
+ }
+ Ptr = Builder.CreateBitCast(Ptr, StTy->getPointerTo());
+ Value *Val = getVectorValue(SI->getValueOperand());
+ Builder.CreateStore(Val, Ptr)->setAlignment(Alignment);
+ break;
+ }
+ case Instruction::Load: {
+ // Attempt to issue a wide load.
+ LoadInst *LI = dyn_cast<LoadInst>(it);
+ Type *RetTy = VectorType::get(LI->getType(), VF);
+ Value *Ptr = LI->getPointerOperand();
+ unsigned Alignment = LI->getAlignment();
+ GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
+
+ // If the pointer is loop invariant or if it is non consecutive,
+ // scalarize the load.
+ bool Con = Legal->isConsecutivePtr(Ptr);
+ if (Legal->isUniform(Ptr) || !Con) {
+ scalarizeInstruction(it);
+ break;
+ }
+
+ if (Gep) {
+ // The last index does not have to be the induction. It can be
+ // consecutive and be a function of the index. For example A[I+1];
+ unsigned NumOperands = Gep->getNumOperands();
+ Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1));
+ LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
+
+ // Create the new GEP with the new induction variable.
+ GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
+ Gep2->setOperand(NumOperands - 1, LastIndex);
+ Ptr = Builder.Insert(Gep2);
+ } else {
+ // Use the induction element ptr.
+ assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
+ Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero);
+ }
+
+ Ptr = Builder.CreateBitCast(Ptr, RetTy->getPointerTo());
+ LI = Builder.CreateLoad(Ptr);
+ LI->setAlignment(Alignment);
+ // Use this vector value for all users of the load.
+ WidenMap[it] = LI;
+ break;
+ }
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ /// Vectorize bitcasts.
+ CastInst *CI = dyn_cast<CastInst>(it);
+ Value *A = getVectorValue(it->getOperand(0));
+ Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF);
+ WidenMap[it] = Builder.CreateCast(CI->getOpcode(), A, DestTy);
+ break;
+ }
+
+ default:
+ /// All other instructions are unsupported. Scalarize them.
+ scalarizeInstruction(it);
+ break;
+ }// end of switch.
+ }// end of for_each instr.
+}
+
+
void InnerLoopVectorizer::updateAnalysis() {
// Forget the original basic block.
SE->forgetLoop(OrigLoop);
diff --git a/test/Transforms/LoopVectorize/if-conversion.ll b/test/Transforms/LoopVectorize/if-conversion.ll
new file mode 100644
index 0000000..35c549c
--- /dev/null
+++ b/test/Transforms/LoopVectorize/if-conversion.ll
@@ -0,0 +1,60 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -enable-if-conversion -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; This is the loop in this example:
+;
+;int function0(int *a, int *b, int start, int end) {
+;
+; for (int i=start; i<end; ++i) {
+; unsigned k = a[i];
+;
+; if (a[i] > b[i]) <------ notice the IF inside the loop.
+; k = k * 5 + 3;
+;
+; a[i] = k; <---- K is a phi node that becomes vector-select.
+; }
+;}
+
+;CHECK: @function0
+;CHECK: load <4 x i32>
+;CHECK: icmp sgt <4 x i32>
+;CHECK: mul <4 x i32>
+;CHECK: add <4 x i32>
+;CHECK: select <4 x i1>
+;CHECK: ret i32
+define i32 @function0(i32* nocapture %a, i32* nocapture %b, i32 %start, i32 %end) nounwind uwtable ssp {
+entry:
+ %cmp16 = icmp slt i32 %start, %end
+ br i1 %cmp16, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+ %0 = sext i32 %start to i64
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %if.end ]
+ %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+ %1 = load i32* %arrayidx, align 4
+ %arrayidx4 = getelementptr inbounds i32* %b, i64 %indvars.iv
+ %2 = load i32* %arrayidx4, align 4
+ %cmp5 = icmp sgt i32 %1, %2
+ br i1 %cmp5, label %if.then, label %if.end
+
+if.then:
+ %mul = mul i32 %1, 5
+ %add = add i32 %mul, 3
+ br label %if.end
+
+if.end:
+ %k.0 = phi i32 [ %add, %if.then ], [ %1, %for.body ]
+ store i32 %k.0, i32* %arrayidx, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %3 = trunc i64 %indvars.iv.next to i32
+ %cmp = icmp slt i32 %3, %end
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ ret i32 undef
+}