blob: d8be0ae721c651f01b76d641e4cb818d22d71a86 [file] [log] [blame]
Nadav Rotem2d9dec32013-04-09 19:44:35 +00001//===- VecUtils.h --- Vectorization Utilities -----------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
Nadav Rotem8543ba32013-04-12 21:16:54 +00009#define DEBUG_TYPE "SLP"
Nadav Rotem2d9dec32013-04-09 19:44:35 +000010
11#include "VecUtils.h"
12#include "llvm/ADT/DenseMap.h"
13#include "llvm/ADT/SmallPtrSet.h"
14#include "llvm/ADT/SmallSet.h"
15#include "llvm/ADT/SmallVector.h"
16#include "llvm/Analysis/AliasAnalysis.h"
17#include "llvm/Analysis/ScalarEvolution.h"
18#include "llvm/Analysis/ScalarEvolutionExpressions.h"
19#include "llvm/Analysis/TargetTransformInfo.h"
20#include "llvm/Analysis/Verifier.h"
21#include "llvm/IR/Constants.h"
22#include "llvm/IR/DataLayout.h"
23#include "llvm/IR/Function.h"
24#include "llvm/IR/Instructions.h"
25#include "llvm/IR/Module.h"
26#include "llvm/IR/Type.h"
27#include "llvm/IR/Value.h"
28#include "llvm/Pass.h"
29#include "llvm/Support/CommandLine.h"
30#include "llvm/Support/Debug.h"
31#include "llvm/Support/raw_ostream.h"
32#include "llvm/Target/TargetLibraryInfo.h"
33#include "llvm/Transforms/Scalar.h"
34#include "llvm/Transforms/Utils/Local.h"
35#include <algorithm>
36#include <map>
37
38using namespace llvm;
39
Nadav Rotem8543ba32013-04-12 21:16:54 +000040static const unsigned MinVecRegSize = 128;
41
42static const unsigned RecursionMaxDepth = 6;
43
Nadav Rotem2d9dec32013-04-09 19:44:35 +000044namespace llvm {
45
46BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl,
47 TargetTransformInfo *Tti, AliasAnalysis *Aa) :
48 BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa) {
49 numberInstructions();
50}
51
52void BoUpSLP::numberInstructions() {
53 int Loc = 0;
54 InstrIdx.clear();
55 InstrVec.clear();
56 // Number the instructions in the block.
57 for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) {
58 InstrIdx[it] = Loc++;
59 InstrVec.push_back(it);
60 assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation");
61 }
62}
63
64Value *BoUpSLP::getPointerOperand(Value *I) {
65 if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand();
66 if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand();
67 return 0;
68}
69
70unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
71 if (LoadInst *L=dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace();
72 if (StoreInst *S=dyn_cast<StoreInst>(I)) return S->getPointerAddressSpace();
73 return -1;
74}
75
76bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
77 Value *PtrA = getPointerOperand(A);
78 Value *PtrB = getPointerOperand(B);
79 unsigned ASA = getAddressSpaceOperand(A);
80 unsigned ASB = getAddressSpaceOperand(B);
81
82 // Check that the address spaces match and that the pointers are valid.
83 if (!PtrA || !PtrB || (ASA != ASB)) return false;
84
85 // Check that A and B are of the same type.
86 if (PtrA->getType() != PtrB->getType()) return false;
87
88 // Calculate the distance.
89 const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
90 const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
91 const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB);
92 const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV);
93
94 // Non constant distance.
95 if (!ConstOffSCEV) return false;
96
97 unsigned Offset = ConstOffSCEV->getValue()->getSExtValue();
98 Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
99 // The Instructions are connsecutive if the size of the first load/store is
100 // the same as the offset.
Nadav Rotem88dd5f72013-04-10 18:57:27 +0000101 unsigned Sz = DL->getTypeStoreSize(Ty);
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000102 return ((-Offset) == Sz);
103}
104
Nadav Rotem8543ba32013-04-12 21:16:54 +0000105bool BoUpSLP::vectorizeStoreChain(ValueList &Chain, int CostThreshold) {
106 Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();
107 unsigned Sz = DL->getTypeSizeInBits(StoreTy);
108 unsigned VF = MinVecRegSize / Sz;
109
110 if (!isPowerOf2_32(Sz) || VF < 2) return false;
111
112 bool Changed = false;
113 for (unsigned i = 0, e = Chain.size(); i < e; ++i) {
114 if (i + VF > e) return Changed;
115 DEBUG(dbgs()<<"SLP: Analyzing " << VF << " stores at offset "<< i << "\n");
116 ValueList Operands(&Chain[i], &Chain[i] + VF);
117
118 int Cost = getTreeCost(Operands);
119 DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
120 if (Cost < CostThreshold) {
121 DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
122 vectorizeTree(Operands, VF);
123 i += VF;
124 Changed = true;
125 }
126 }
127
128 return Changed;
129}
130
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000131bool BoUpSLP::vectorizeStores(StoreList &Stores, int costThreshold) {
132 ValueSet Heads, Tails;
133 SmallDenseMap<Value*, Value*> ConsecutiveChain;
Nadav Rotem8543ba32013-04-12 21:16:54 +0000134
135 // We may run into multiple chains that merge into a single chain. We mark the
136 // stores that we vectorized so that we don't visit the same store twice.
137 ValueSet VectorizedStores;
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000138 bool Changed = false;
139
140 // Do a quadratic search on all of the given stores and find
141 // all of the pairs of loads that follow each other.
142 for (unsigned i = 0, e = Stores.size(); i < e; ++i)
143 for (unsigned j = 0; j < e; ++j) {
144 if (i == j) continue;
145 if (isConsecutiveAccess(Stores[i], Stores[j])) {
146 Tails.insert(Stores[j]);
147 Heads.insert(Stores[i]);
148 ConsecutiveChain[Stores[i]] = Stores[j];
149 }
150 }
151
152 // For stores that start but don't end a link in the chain:
153 for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) {
154 if (Tails.count(*it)) continue;
155
156 // We found a store instr that starts a chain. Now follow the chain and try
157 // to vectorize it.
158 ValueList Operands;
159 Value *I = *it;
Nadav Rotem8543ba32013-04-12 21:16:54 +0000160 // Collect the chain into a list.
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000161 while (Tails.count(I) || Heads.count(I)) {
Nadav Rotem8543ba32013-04-12 21:16:54 +0000162 if (VectorizedStores.count(I)) break;
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000163 Operands.push_back(I);
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000164 // Move to the next value in the chain.
165 I = ConsecutiveChain[I];
166 }
167
Nadav Rotem8543ba32013-04-12 21:16:54 +0000168 bool Vectorized = vectorizeStoreChain(Operands, costThreshold);
169 if (Vectorized) VectorizedStores.insert(Operands.begin(), Operands.end());
170 Changed |= Vectorized;
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000171 }
172
173 return Changed;
174}
175
176int BoUpSLP::getScalarizationCost(Type *Ty) {
177 int Cost = 0;
178 for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
179 Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
180 return Cost;
181}
182
183AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) {
184 if (StoreInst *SI = dyn_cast<StoreInst>(I)) return AA->getLocation(SI);
185 if (LoadInst *LI = dyn_cast<LoadInst>(I)) return AA->getLocation(LI);
186 return AliasAnalysis::Location();
187}
188
189Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) {
190 assert(Src->getParent() == Dst->getParent() && "Not the same BB");
191 BasicBlock::iterator I = Src, E = Dst;
192 /// Scan all of the instruction from SRC to DST and check if
193 /// the source may alias.
194 for (++I; I != E; ++I) {
195 // Ignore store instructions that are marked as 'ignore'.
196 if (MemBarrierIgnoreList.count(I)) continue;
197 if (Src->mayWriteToMemory()) /* Write */ {
198 if (!I->mayReadOrWriteMemory()) continue;
199 } else /* Read */ {
200 if (!I->mayWriteToMemory()) continue;
201 }
202 AliasAnalysis::Location A = getLocation(&*I);
203 AliasAnalysis::Location B = getLocation(Src);
204
205 if (!A.Ptr || !B.Ptr || AA->alias(A, B))
206 return I;
207 }
208 return 0;
209}
210
Nadav Rotem8543ba32013-04-12 21:16:54 +0000211int BoUpSLP::getTreeCost(ValueList &VL) {
212 // Get rid of the list of stores that were removed, and from the
213 // lists of instructions with multiple users.
214 MemBarrierIgnoreList.clear();
215 LaneMap.clear();
216 MultiUserVals.clear();
217 MustScalarize.clear();
218
219 // Scan the tree and find which value is used by which lane, and which values
220 // must be scalarized.
221 getTreeUses_rec(VL, 0);
222
223 // Check that instructions with multiple users can be vectorized. Mark unsafe
224 // instructions.
225 for (ValueSet::iterator it = MultiUserVals.begin(),
226 e = MultiUserVals.end(); it != e; ++it) {
227 // Check that all of the users of this instr are within the tree
228 // and that they are all from the same lane.
229 int Lane = -1;
230 for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();
231 I != E; ++I) {
232 if (LaneMap.find(*I) == LaneMap.end()) {
233 MustScalarize.insert(*it);
234 DEBUG(dbgs()<<"SLP: Adding " << **it <<
235 " to MustScalarize because of an out of tree usage.\n");
236 break;
237 }
238 if (Lane == -1) Lane = LaneMap[*I];
239 if (Lane != LaneMap[*I]) {
240 MustScalarize.insert(*it);
241 DEBUG(dbgs()<<"Adding " << **it <<
242 " to MustScalarize because multiple lane use it: "
243 << Lane << " and " << LaneMap[*I] << ".\n");
244 break;
245 }
246 }
247 }
248
249 // Now calculate the cost of vectorizing the tree.
250 return getTreeCost_rec(VL, 0);
251}
252
253void BoUpSLP::getTreeUses_rec(ValueList &VL, unsigned Depth) {
254 if (Depth == RecursionMaxDepth) return;
255
256 // Don't handle vectors.
257 if (VL[0]->getType()->isVectorTy()) return;
258 if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
259 if (SI->getValueOperand()->getType()->isVectorTy()) return;
260
261 // Check if all of the operands are constants.
262 bool AllConst = true;
263 bool AllSameScalar = true;
264 for (unsigned i = 0, e = VL.size(); i < e; ++i) {
265 AllConst &= isa<Constant>(VL[i]);
266 AllSameScalar &= (VL[0] == VL[i]);
267 Instruction *I = dyn_cast<Instruction>(VL[i]);
268 // If one of the instructions is out of this BB, we need to scalarize all.
269 if (I && I->getParent() != BB) return;
270 }
271
272 // If all of the operands are identical or constant we have a simple solution.
273 if (AllConst || AllSameScalar) return;
274
275 // Scalarize unknown structures.
276 Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
277 if (!VL0) return;
278
279 unsigned Opcode = VL0->getOpcode();
280 for (unsigned i = 0, e = VL.size(); i < e; ++i) {
281 Instruction *I = dyn_cast<Instruction>(VL[i]);
282 // If not all of the instructions are identical then we have to scalarize.
283 if (!I || Opcode != I->getOpcode()) return;
284 }
285
286 // Mark instructions with multiple users.
287 for (unsigned i = 0, e = VL.size(); i < e; ++i) {
288 Instruction *I = dyn_cast<Instruction>(VL[i]);
289 // Remember to check if all of the users of this instr are vectorized
290 // within our tree.
291 if (I && I->getNumUses() > 1) MultiUserVals.insert(I);
292 }
293
294 for (int i = 0, e = VL.size(); i < e; ++i) {
295 // Check that the instruction is only used within
296 // one lane.
297 if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) return;
298 // Make this instruction as 'seen' and remember the lane.
299 LaneMap[VL[i]] = i;
300 }
301
302 switch (Opcode) {
303 case Instruction::Add:
304 case Instruction::FAdd:
305 case Instruction::Sub:
306 case Instruction::FSub:
307 case Instruction::Mul:
308 case Instruction::FMul:
309 case Instruction::UDiv:
310 case Instruction::SDiv:
311 case Instruction::FDiv:
312 case Instruction::URem:
313 case Instruction::SRem:
314 case Instruction::FRem:
315 case Instruction::Shl:
316 case Instruction::LShr:
317 case Instruction::AShr:
318 case Instruction::And:
319 case Instruction::Or:
320 case Instruction::Xor: {
321 for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
322 ValueList Operands;
323 // Prepare the operand vector.
324 for (unsigned j = 0; j < VL.size(); ++j)
325 Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
326
327 getTreeUses_rec(Operands, Depth+1);
328 }
329 }
330 case Instruction::Store: {
331 ValueList Operands;
332 for (unsigned j = 0; j < VL.size(); ++j)
333 Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
334 getTreeUses_rec(Operands, Depth+1);
335 return;
336 }
337 default:
338 return;
339 }
340}
341
342int BoUpSLP::getTreeCost_rec(ValueList &VL, unsigned Depth) {
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000343 Type *ScalarTy = VL[0]->getType();
344
345 if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
346 ScalarTy = SI->getValueOperand()->getType();
347
348 /// Don't mess with vectors.
349 if (ScalarTy->isVectorTy()) return max_cost;
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000350 VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
351
Nadav Rotem8543ba32013-04-12 21:16:54 +0000352 if (Depth == RecursionMaxDepth) return getScalarizationCost(VecTy);
353
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000354 // Check if all of the operands are constants.
355 bool AllConst = true;
356 bool AllSameScalar = true;
357 for (unsigned i = 0, e = VL.size(); i < e; ++i) {
358 AllConst &= isa<Constant>(VL[i]);
359 AllSameScalar &= (VL[0] == VL[i]);
360 // Must have a single use.
361 Instruction *I = dyn_cast<Instruction>(VL[i]);
Nadav Rotem8543ba32013-04-12 21:16:54 +0000362 // This instruction is outside the basic block or if it is a known hazard.
363 if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB))
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000364 return getScalarizationCost(VecTy);
365 }
366
367 // Is this a simple vector constant.
368 if (AllConst) return 0;
369
370 // If all of the operands are identical we can broadcast them.
371 if (AllSameScalar)
372 return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
373
374 // Scalarize unknown structures.
375 Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
376 if (!VL0) return getScalarizationCost(VecTy);
377 assert(VL0->getParent() == BB && "Wrong BB");
378
379 unsigned Opcode = VL0->getOpcode();
380 for (unsigned i = 0, e = VL.size(); i < e; ++i) {
381 Instruction *I = dyn_cast<Instruction>(VL[i]);
382 // If not all of the instructions are identical then we have to scalarize.
383 if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy);
384 }
385
386 // Check if it is safe to sink the loads or the stores.
387 if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
388 int MaxIdx = InstrIdx[VL0];
389 for (unsigned i = 1, e = VL.size(); i < e; ++i )
390 MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
391
392 Instruction *Last = InstrVec[MaxIdx];
393 for (unsigned i = 0, e = VL.size(); i < e; ++i ) {
394 if (VL[i] == Last) continue;
395 Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last);
396 if (Barrier) {
Nadav Rotem8543ba32013-04-12 21:16:54 +0000397 DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " <<
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000398 *Last << "\n because of " << *Barrier << "\n");
399 return max_cost;
400 }
401 }
402 }
403
404 switch (Opcode) {
405 case Instruction::Add:
406 case Instruction::FAdd:
407 case Instruction::Sub:
408 case Instruction::FSub:
409 case Instruction::Mul:
410 case Instruction::FMul:
411 case Instruction::UDiv:
412 case Instruction::SDiv:
413 case Instruction::FDiv:
414 case Instruction::URem:
415 case Instruction::SRem:
416 case Instruction::FRem:
417 case Instruction::Shl:
418 case Instruction::LShr:
419 case Instruction::AShr:
420 case Instruction::And:
421 case Instruction::Or:
422 case Instruction::Xor: {
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000423 int Cost = 0;
424 // Calculate the cost of all of the operands.
425 for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
Nadav Rotem8543ba32013-04-12 21:16:54 +0000426 ValueList Operands;
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000427 // Prepare the operand vector.
428 for (unsigned j = 0; j < VL.size(); ++j)
429 Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
Nadav Rotem8543ba32013-04-12 21:16:54 +0000430
431 Cost += getTreeCost_rec(Operands, Depth+1);
432 if (Cost >= max_cost) return max_cost;
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000433 }
434
435 // Calculate the cost of this instruction.
436 int ScalarCost = VecTy->getNumElements() *
437 TTI->getArithmeticInstrCost(Opcode, ScalarTy);
Nadav Rotem8543ba32013-04-12 21:16:54 +0000438
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000439 int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
440 Cost += (VecCost - ScalarCost);
441 return Cost;
442 }
443 case Instruction::Load: {
444 // If we are scalarize the loads, add the cost of forming the vector.
445 for (unsigned i = 0, e = VL.size()-1; i < e; ++i)
446 if (!isConsecutiveAccess(VL[i], VL[i+1]))
447 return getScalarizationCost(VecTy);
448
449 // Cost of wide load - cost of scalar loads.
450 int ScalarLdCost = VecTy->getNumElements() *
451 TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
452 int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
453 return VecLdCost - ScalarLdCost;
454 }
455 case Instruction::Store: {
456 // We know that we can merge the stores. Calculate the cost.
457 int ScalarStCost = VecTy->getNumElements() *
458 TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
459 int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0);
460 int StoreCost = VecStCost - ScalarStCost;
461
462 ValueList Operands;
463 for (unsigned j = 0; j < VL.size(); ++j) {
464 Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
465 MemBarrierIgnoreList.insert(VL[j]);
466 }
467
Nadav Rotem8543ba32013-04-12 21:16:54 +0000468 int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1);
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000469 return TotalCost;
470 }
471 default:
472 // Unable to vectorize unknown instructions.
473 return getScalarizationCost(VecTy);
474 }
475}
476
477Instruction *BoUpSLP::GetLastInstr(ValueList &VL, unsigned VF) {
478 int MaxIdx = InstrIdx[BB->getFirstNonPHI()];
479 for (unsigned i = 0; i < VF; ++i )
480 MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
481 return InstrVec[MaxIdx + 1];
482}
483
484Value *BoUpSLP::Scalarize(ValueList &VL, VectorType *Ty) {
485 IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements()));
486 Value *Vec = UndefValue::get(Ty);
487 for (unsigned i=0; i < Ty->getNumElements(); ++i)
488 Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
489 return Vec;
490}
491
492Value *BoUpSLP::vectorizeTree(ValueList &VL, int VF) {
Nadav Rotem8543ba32013-04-12 21:16:54 +0000493 Value *V = vectorizeTree_rec(VL, VF);
494 // We moved some instructions around. We have to number them again
495 // before we can do any analysis.
496 numberInstructions();
497 MustScalarize.clear();
498 return V;
499}
500
501Value *BoUpSLP::vectorizeTree_rec(ValueList &VL, int VF) {
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000502 Type *ScalarTy = VL[0]->getType();
503 if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
504 ScalarTy = SI->getValueOperand()->getType();
505 VectorType *VecTy = VectorType::get(ScalarTy, VF);
506
507 // Check if all of the operands are constants or identical.
508 bool AllConst = true;
509 bool AllSameScalar = true;
510 for (unsigned i = 0, e = VF; i < e; ++i) {
511 AllConst &= !!dyn_cast<Constant>(VL[i]);
512 AllSameScalar &= (VL[0] == VL[i]);
Nadav Rotem8543ba32013-04-12 21:16:54 +0000513 // The instruction must be in the same BB, and it must be vectorizable.
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000514 Instruction *I = dyn_cast<Instruction>(VL[i]);
Nadav Rotem8543ba32013-04-12 21:16:54 +0000515 if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB))
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000516 return Scalarize(VL, VecTy);
517 }
518
Nadav Rotem8543ba32013-04-12 21:16:54 +0000519 // Check that this is a simple vector constant.
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000520 if (AllConst || AllSameScalar) return Scalarize(VL, VecTy);
521
522 // Scalarize unknown structures.
523 Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
524 if (!VL0) return Scalarize(VL, VecTy);
525
Nadav Rotem8543ba32013-04-12 21:16:54 +0000526 if (VectorizedValues.count(VL0)) return VectorizedValues[VL0];
527
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000528 unsigned Opcode = VL0->getOpcode();
529 for (unsigned i = 0, e = VF; i < e; ++i) {
530 Instruction *I = dyn_cast<Instruction>(VL[i]);
531 // If not all of the instructions are identical then we have to scalarize.
532 if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy);
533 }
534
535 switch (Opcode) {
536 case Instruction::Add:
537 case Instruction::FAdd:
538 case Instruction::Sub:
539 case Instruction::FSub:
540 case Instruction::Mul:
541 case Instruction::FMul:
542 case Instruction::UDiv:
543 case Instruction::SDiv:
544 case Instruction::FDiv:
545 case Instruction::URem:
546 case Instruction::SRem:
547 case Instruction::FRem:
548 case Instruction::Shl:
549 case Instruction::LShr:
550 case Instruction::AShr:
551 case Instruction::And:
552 case Instruction::Or:
553 case Instruction::Xor: {
554 ValueList LHSVL, RHSVL;
555 for (int i = 0; i < VF; ++i) {
556 RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
557 LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
558 }
559
Nadav Rotem8543ba32013-04-12 21:16:54 +0000560 Value *RHS = vectorizeTree_rec(RHSVL, VF);
561 Value *LHS = vectorizeTree_rec(LHSVL, VF);
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000562 IRBuilder<> Builder(GetLastInstr(VL, VF));
563 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(VL0);
Nadav Rotem8543ba32013-04-12 21:16:54 +0000564 Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS);
565 VectorizedValues[VL0] = V;
566 return V;
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000567 }
568 case Instruction::Load: {
569 LoadInst *LI = dyn_cast<LoadInst>(VL0);
570 unsigned Alignment = LI->getAlignment();
571
572 // Check if all of the loads are consecutive.
573 for (unsigned i = 1, e = VF; i < e; ++i)
574 if (!isConsecutiveAccess(VL[i-1], VL[i]))
575 return Scalarize(VL, VecTy);
576
577 IRBuilder<> Builder(GetLastInstr(VL, VF));
578 Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
579 VecTy->getPointerTo());
580 LI = Builder.CreateLoad(VecPtr);
581 LI->setAlignment(Alignment);
Nadav Rotem8543ba32013-04-12 21:16:54 +0000582 VectorizedValues[VL0] = LI;
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000583 return LI;
584 }
585 case Instruction::Store: {
586 StoreInst *SI = dyn_cast<StoreInst>(VL0);
587 unsigned Alignment = SI->getAlignment();
588
589 ValueList ValueOp;
590 for (int i = 0; i < VF; ++i)
591 ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());
592
Nadav Rotem8543ba32013-04-12 21:16:54 +0000593 Value *VecValue = vectorizeTree_rec(ValueOp, VF);
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000594
595 IRBuilder<> Builder(GetLastInstr(VL, VF));
596 Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
597 VecTy->getPointerTo());
598 Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);
599
600 for (int i = 0; i < VF; ++i)
601 cast<Instruction>(VL[i])->eraseFromParent();
602 return 0;
603 }
604 default:
Nadav Rotem8543ba32013-04-12 21:16:54 +0000605 Value *S = Scalarize(VL, VecTy);
606 VectorizedValues[VL0] = S;
607 return S;
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000608 }
609}
610
611} // end of namespace