blob: 584f3d97788cd4457ce2317cc7be5fe2f4320ea2 [file] [log] [blame]
Nadav Rotem2d9dec32013-04-09 19:44:35 +00001//===- VecUtils.h --- Vectorization Utilities -----------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
Nadav Rotem8543ba32013-04-12 21:16:54 +00009#define DEBUG_TYPE "SLP"
Nadav Rotem2d9dec32013-04-09 19:44:35 +000010
11#include "VecUtils.h"
12#include "llvm/ADT/DenseMap.h"
13#include "llvm/ADT/SmallPtrSet.h"
14#include "llvm/ADT/SmallSet.h"
15#include "llvm/ADT/SmallVector.h"
16#include "llvm/Analysis/AliasAnalysis.h"
17#include "llvm/Analysis/ScalarEvolution.h"
18#include "llvm/Analysis/ScalarEvolutionExpressions.h"
19#include "llvm/Analysis/TargetTransformInfo.h"
20#include "llvm/Analysis/Verifier.h"
21#include "llvm/IR/Constants.h"
22#include "llvm/IR/DataLayout.h"
23#include "llvm/IR/Function.h"
24#include "llvm/IR/Instructions.h"
25#include "llvm/IR/Module.h"
26#include "llvm/IR/Type.h"
27#include "llvm/IR/Value.h"
28#include "llvm/Pass.h"
29#include "llvm/Support/CommandLine.h"
30#include "llvm/Support/Debug.h"
31#include "llvm/Support/raw_ostream.h"
32#include "llvm/Target/TargetLibraryInfo.h"
33#include "llvm/Transforms/Scalar.h"
34#include "llvm/Transforms/Utils/Local.h"
35#include <algorithm>
36#include <map>
37
38using namespace llvm;
39
Nadav Rotem8543ba32013-04-12 21:16:54 +000040static const unsigned MinVecRegSize = 128;
41
42static const unsigned RecursionMaxDepth = 6;
43
Nadav Rotem2d9dec32013-04-09 19:44:35 +000044namespace llvm {
45
46BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl,
47 TargetTransformInfo *Tti, AliasAnalysis *Aa) :
48 BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa) {
49 numberInstructions();
50}
51
52void BoUpSLP::numberInstructions() {
53 int Loc = 0;
54 InstrIdx.clear();
55 InstrVec.clear();
56 // Number the instructions in the block.
57 for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) {
58 InstrIdx[it] = Loc++;
59 InstrVec.push_back(it);
60 assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation");
61 }
62}
63
64Value *BoUpSLP::getPointerOperand(Value *I) {
65 if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand();
66 if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand();
67 return 0;
68}
69
70unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
71 if (LoadInst *L=dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace();
72 if (StoreInst *S=dyn_cast<StoreInst>(I)) return S->getPointerAddressSpace();
73 return -1;
74}
75
76bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
77 Value *PtrA = getPointerOperand(A);
78 Value *PtrB = getPointerOperand(B);
79 unsigned ASA = getAddressSpaceOperand(A);
80 unsigned ASB = getAddressSpaceOperand(B);
81
82 // Check that the address spaces match and that the pointers are valid.
83 if (!PtrA || !PtrB || (ASA != ASB)) return false;
84
85 // Check that A and B are of the same type.
86 if (PtrA->getType() != PtrB->getType()) return false;
87
88 // Calculate the distance.
89 const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
90 const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
91 const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB);
92 const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV);
93
94 // Non constant distance.
95 if (!ConstOffSCEV) return false;
96
97 unsigned Offset = ConstOffSCEV->getValue()->getSExtValue();
98 Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
99 // The Instructions are connsecutive if the size of the first load/store is
100 // the same as the offset.
Nadav Rotem88dd5f72013-04-10 18:57:27 +0000101 unsigned Sz = DL->getTypeStoreSize(Ty);
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000102 return ((-Offset) == Sz);
103}
104
Nadav Rotem8543ba32013-04-12 21:16:54 +0000105bool BoUpSLP::vectorizeStoreChain(ValueList &Chain, int CostThreshold) {
106 Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();
107 unsigned Sz = DL->getTypeSizeInBits(StoreTy);
108 unsigned VF = MinVecRegSize / Sz;
109
110 if (!isPowerOf2_32(Sz) || VF < 2) return false;
111
112 bool Changed = false;
113 for (unsigned i = 0, e = Chain.size(); i < e; ++i) {
114 if (i + VF > e) return Changed;
115 DEBUG(dbgs()<<"SLP: Analyzing " << VF << " stores at offset "<< i << "\n");
116 ValueList Operands(&Chain[i], &Chain[i] + VF);
117
118 int Cost = getTreeCost(Operands);
119 DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
120 if (Cost < CostThreshold) {
121 DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
122 vectorizeTree(Operands, VF);
123 i += VF;
124 Changed = true;
125 }
126 }
127
128 return Changed;
129}
130
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000131bool BoUpSLP::vectorizeStores(StoreList &Stores, int costThreshold) {
132 ValueSet Heads, Tails;
133 SmallDenseMap<Value*, Value*> ConsecutiveChain;
Nadav Rotem8543ba32013-04-12 21:16:54 +0000134
135 // We may run into multiple chains that merge into a single chain. We mark the
136 // stores that we vectorized so that we don't visit the same store twice.
137 ValueSet VectorizedStores;
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000138 bool Changed = false;
139
140 // Do a quadratic search on all of the given stores and find
141 // all of the pairs of loads that follow each other.
142 for (unsigned i = 0, e = Stores.size(); i < e; ++i)
143 for (unsigned j = 0; j < e; ++j) {
144 if (i == j) continue;
145 if (isConsecutiveAccess(Stores[i], Stores[j])) {
146 Tails.insert(Stores[j]);
147 Heads.insert(Stores[i]);
148 ConsecutiveChain[Stores[i]] = Stores[j];
149 }
150 }
151
152 // For stores that start but don't end a link in the chain:
153 for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) {
154 if (Tails.count(*it)) continue;
155
156 // We found a store instr that starts a chain. Now follow the chain and try
157 // to vectorize it.
158 ValueList Operands;
159 Value *I = *it;
Nadav Rotem8543ba32013-04-12 21:16:54 +0000160 // Collect the chain into a list.
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000161 while (Tails.count(I) || Heads.count(I)) {
Nadav Rotem8543ba32013-04-12 21:16:54 +0000162 if (VectorizedStores.count(I)) break;
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000163 Operands.push_back(I);
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000164 // Move to the next value in the chain.
165 I = ConsecutiveChain[I];
166 }
167
Nadav Rotem8543ba32013-04-12 21:16:54 +0000168 bool Vectorized = vectorizeStoreChain(Operands, costThreshold);
169 if (Vectorized) VectorizedStores.insert(Operands.begin(), Operands.end());
170 Changed |= Vectorized;
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000171 }
172
173 return Changed;
174}
175
Nadav Rotem54b413d2013-04-14 05:15:53 +0000176int BoUpSLP::getScalarizationCost(ValueList &VL) {
177 Type *ScalarTy = VL[0]->getType();
178
179 if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
180 ScalarTy = SI->getValueOperand()->getType();
181
182 VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
183 return getScalarizationCost(VecTy);
184}
185
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000186int BoUpSLP::getScalarizationCost(Type *Ty) {
187 int Cost = 0;
188 for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
189 Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
190 return Cost;
191}
192
193AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) {
194 if (StoreInst *SI = dyn_cast<StoreInst>(I)) return AA->getLocation(SI);
195 if (LoadInst *LI = dyn_cast<LoadInst>(I)) return AA->getLocation(LI);
196 return AliasAnalysis::Location();
197}
198
199Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) {
200 assert(Src->getParent() == Dst->getParent() && "Not the same BB");
201 BasicBlock::iterator I = Src, E = Dst;
202 /// Scan all of the instruction from SRC to DST and check if
203 /// the source may alias.
204 for (++I; I != E; ++I) {
205 // Ignore store instructions that are marked as 'ignore'.
206 if (MemBarrierIgnoreList.count(I)) continue;
207 if (Src->mayWriteToMemory()) /* Write */ {
208 if (!I->mayReadOrWriteMemory()) continue;
209 } else /* Read */ {
210 if (!I->mayWriteToMemory()) continue;
211 }
212 AliasAnalysis::Location A = getLocation(&*I);
213 AliasAnalysis::Location B = getLocation(Src);
214
215 if (!A.Ptr || !B.Ptr || AA->alias(A, B))
216 return I;
217 }
218 return 0;
219}
220
Nadav Rotem0b9cf852013-04-14 03:22:20 +0000221void BoUpSLP::vectorizeArith(ValueList &Operands) {
222 Value *Vec = vectorizeTree(Operands, Operands.size());
223 BasicBlock::iterator Loc = cast<Instruction>(Vec);
224 IRBuilder<> Builder(++Loc);
225 for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
226 Value *S = Builder.CreateExtractElement(Vec, Builder.getInt32(i));
227 Operands[i]->replaceAllUsesWith(S);
228 }
229}
230
Nadav Rotem8543ba32013-04-12 21:16:54 +0000231int BoUpSLP::getTreeCost(ValueList &VL) {
232 // Get rid of the list of stores that were removed, and from the
233 // lists of instructions with multiple users.
234 MemBarrierIgnoreList.clear();
235 LaneMap.clear();
236 MultiUserVals.clear();
237 MustScalarize.clear();
238
239 // Scan the tree and find which value is used by which lane, and which values
240 // must be scalarized.
241 getTreeUses_rec(VL, 0);
242
243 // Check that instructions with multiple users can be vectorized. Mark unsafe
244 // instructions.
245 for (ValueSet::iterator it = MultiUserVals.begin(),
246 e = MultiUserVals.end(); it != e; ++it) {
247 // Check that all of the users of this instr are within the tree
248 // and that they are all from the same lane.
249 int Lane = -1;
250 for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();
251 I != E; ++I) {
252 if (LaneMap.find(*I) == LaneMap.end()) {
253 MustScalarize.insert(*it);
254 DEBUG(dbgs()<<"SLP: Adding " << **it <<
255 " to MustScalarize because of an out of tree usage.\n");
256 break;
257 }
258 if (Lane == -1) Lane = LaneMap[*I];
259 if (Lane != LaneMap[*I]) {
260 MustScalarize.insert(*it);
261 DEBUG(dbgs()<<"Adding " << **it <<
262 " to MustScalarize because multiple lane use it: "
263 << Lane << " and " << LaneMap[*I] << ".\n");
264 break;
265 }
266 }
267 }
268
269 // Now calculate the cost of vectorizing the tree.
270 return getTreeCost_rec(VL, 0);
271}
272
273void BoUpSLP::getTreeUses_rec(ValueList &VL, unsigned Depth) {
274 if (Depth == RecursionMaxDepth) return;
275
276 // Don't handle vectors.
277 if (VL[0]->getType()->isVectorTy()) return;
278 if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
279 if (SI->getValueOperand()->getType()->isVectorTy()) return;
280
281 // Check if all of the operands are constants.
282 bool AllConst = true;
283 bool AllSameScalar = true;
284 for (unsigned i = 0, e = VL.size(); i < e; ++i) {
285 AllConst &= isa<Constant>(VL[i]);
286 AllSameScalar &= (VL[0] == VL[i]);
287 Instruction *I = dyn_cast<Instruction>(VL[i]);
288 // If one of the instructions is out of this BB, we need to scalarize all.
289 if (I && I->getParent() != BB) return;
290 }
291
292 // If all of the operands are identical or constant we have a simple solution.
293 if (AllConst || AllSameScalar) return;
294
295 // Scalarize unknown structures.
296 Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
297 if (!VL0) return;
298
299 unsigned Opcode = VL0->getOpcode();
300 for (unsigned i = 0, e = VL.size(); i < e; ++i) {
301 Instruction *I = dyn_cast<Instruction>(VL[i]);
302 // If not all of the instructions are identical then we have to scalarize.
303 if (!I || Opcode != I->getOpcode()) return;
304 }
305
306 // Mark instructions with multiple users.
307 for (unsigned i = 0, e = VL.size(); i < e; ++i) {
308 Instruction *I = dyn_cast<Instruction>(VL[i]);
309 // Remember to check if all of the users of this instr are vectorized
310 // within our tree.
311 if (I && I->getNumUses() > 1) MultiUserVals.insert(I);
312 }
313
314 for (int i = 0, e = VL.size(); i < e; ++i) {
315 // Check that the instruction is only used within
316 // one lane.
317 if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) return;
318 // Make this instruction as 'seen' and remember the lane.
319 LaneMap[VL[i]] = i;
320 }
321
322 switch (Opcode) {
323 case Instruction::Add:
324 case Instruction::FAdd:
325 case Instruction::Sub:
326 case Instruction::FSub:
327 case Instruction::Mul:
328 case Instruction::FMul:
329 case Instruction::UDiv:
330 case Instruction::SDiv:
331 case Instruction::FDiv:
332 case Instruction::URem:
333 case Instruction::SRem:
334 case Instruction::FRem:
335 case Instruction::Shl:
336 case Instruction::LShr:
337 case Instruction::AShr:
338 case Instruction::And:
339 case Instruction::Or:
340 case Instruction::Xor: {
341 for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
342 ValueList Operands;
343 // Prepare the operand vector.
344 for (unsigned j = 0; j < VL.size(); ++j)
345 Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
346
347 getTreeUses_rec(Operands, Depth+1);
348 }
349 }
350 case Instruction::Store: {
351 ValueList Operands;
352 for (unsigned j = 0; j < VL.size(); ++j)
353 Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
354 getTreeUses_rec(Operands, Depth+1);
355 return;
356 }
357 default:
358 return;
359 }
360}
361
362int BoUpSLP::getTreeCost_rec(ValueList &VL, unsigned Depth) {
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000363 Type *ScalarTy = VL[0]->getType();
364
365 if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
366 ScalarTy = SI->getValueOperand()->getType();
367
368 /// Don't mess with vectors.
369 if (ScalarTy->isVectorTy()) return max_cost;
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000370 VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
371
Nadav Rotem8543ba32013-04-12 21:16:54 +0000372 if (Depth == RecursionMaxDepth) return getScalarizationCost(VecTy);
373
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000374 // Check if all of the operands are constants.
375 bool AllConst = true;
376 bool AllSameScalar = true;
377 for (unsigned i = 0, e = VL.size(); i < e; ++i) {
378 AllConst &= isa<Constant>(VL[i]);
379 AllSameScalar &= (VL[0] == VL[i]);
380 // Must have a single use.
381 Instruction *I = dyn_cast<Instruction>(VL[i]);
Nadav Rotem8543ba32013-04-12 21:16:54 +0000382 // This instruction is outside the basic block or if it is a known hazard.
383 if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB))
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000384 return getScalarizationCost(VecTy);
385 }
386
387 // Is this a simple vector constant.
388 if (AllConst) return 0;
389
390 // If all of the operands are identical we can broadcast them.
391 if (AllSameScalar)
392 return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
393
394 // Scalarize unknown structures.
395 Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
396 if (!VL0) return getScalarizationCost(VecTy);
397 assert(VL0->getParent() == BB && "Wrong BB");
398
399 unsigned Opcode = VL0->getOpcode();
400 for (unsigned i = 0, e = VL.size(); i < e; ++i) {
401 Instruction *I = dyn_cast<Instruction>(VL[i]);
402 // If not all of the instructions are identical then we have to scalarize.
403 if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy);
404 }
405
406 // Check if it is safe to sink the loads or the stores.
407 if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
408 int MaxIdx = InstrIdx[VL0];
409 for (unsigned i = 1, e = VL.size(); i < e; ++i )
410 MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
411
412 Instruction *Last = InstrVec[MaxIdx];
413 for (unsigned i = 0, e = VL.size(); i < e; ++i ) {
414 if (VL[i] == Last) continue;
415 Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last);
416 if (Barrier) {
Nadav Rotem8543ba32013-04-12 21:16:54 +0000417 DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " <<
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000418 *Last << "\n because of " << *Barrier << "\n");
419 return max_cost;
420 }
421 }
422 }
423
424 switch (Opcode) {
425 case Instruction::Add:
426 case Instruction::FAdd:
427 case Instruction::Sub:
428 case Instruction::FSub:
429 case Instruction::Mul:
430 case Instruction::FMul:
431 case Instruction::UDiv:
432 case Instruction::SDiv:
433 case Instruction::FDiv:
434 case Instruction::URem:
435 case Instruction::SRem:
436 case Instruction::FRem:
437 case Instruction::Shl:
438 case Instruction::LShr:
439 case Instruction::AShr:
440 case Instruction::And:
441 case Instruction::Or:
442 case Instruction::Xor: {
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000443 int Cost = 0;
444 // Calculate the cost of all of the operands.
445 for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
Nadav Rotem8543ba32013-04-12 21:16:54 +0000446 ValueList Operands;
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000447 // Prepare the operand vector.
448 for (unsigned j = 0; j < VL.size(); ++j)
449 Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
Nadav Rotem8543ba32013-04-12 21:16:54 +0000450
451 Cost += getTreeCost_rec(Operands, Depth+1);
452 if (Cost >= max_cost) return max_cost;
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000453 }
454
455 // Calculate the cost of this instruction.
456 int ScalarCost = VecTy->getNumElements() *
457 TTI->getArithmeticInstrCost(Opcode, ScalarTy);
Nadav Rotem8543ba32013-04-12 21:16:54 +0000458
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000459 int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
460 Cost += (VecCost - ScalarCost);
461 return Cost;
462 }
463 case Instruction::Load: {
464 // If we are scalarize the loads, add the cost of forming the vector.
465 for (unsigned i = 0, e = VL.size()-1; i < e; ++i)
466 if (!isConsecutiveAccess(VL[i], VL[i+1]))
467 return getScalarizationCost(VecTy);
468
469 // Cost of wide load - cost of scalar loads.
470 int ScalarLdCost = VecTy->getNumElements() *
471 TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
472 int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
473 return VecLdCost - ScalarLdCost;
474 }
475 case Instruction::Store: {
476 // We know that we can merge the stores. Calculate the cost.
477 int ScalarStCost = VecTy->getNumElements() *
478 TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
479 int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0);
480 int StoreCost = VecStCost - ScalarStCost;
481
482 ValueList Operands;
483 for (unsigned j = 0; j < VL.size(); ++j) {
484 Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
485 MemBarrierIgnoreList.insert(VL[j]);
486 }
487
Nadav Rotem8543ba32013-04-12 21:16:54 +0000488 int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1);
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000489 return TotalCost;
490 }
491 default:
492 // Unable to vectorize unknown instructions.
493 return getScalarizationCost(VecTy);
494 }
495}
496
497Instruction *BoUpSLP::GetLastInstr(ValueList &VL, unsigned VF) {
498 int MaxIdx = InstrIdx[BB->getFirstNonPHI()];
499 for (unsigned i = 0; i < VF; ++i )
500 MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
501 return InstrVec[MaxIdx + 1];
502}
503
504Value *BoUpSLP::Scalarize(ValueList &VL, VectorType *Ty) {
505 IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements()));
506 Value *Vec = UndefValue::get(Ty);
507 for (unsigned i=0; i < Ty->getNumElements(); ++i)
508 Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
509 return Vec;
510}
511
512Value *BoUpSLP::vectorizeTree(ValueList &VL, int VF) {
Nadav Rotem8543ba32013-04-12 21:16:54 +0000513 Value *V = vectorizeTree_rec(VL, VF);
514 // We moved some instructions around. We have to number them again
515 // before we can do any analysis.
516 numberInstructions();
517 MustScalarize.clear();
518 return V;
519}
520
521Value *BoUpSLP::vectorizeTree_rec(ValueList &VL, int VF) {
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000522 Type *ScalarTy = VL[0]->getType();
523 if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
524 ScalarTy = SI->getValueOperand()->getType();
525 VectorType *VecTy = VectorType::get(ScalarTy, VF);
526
527 // Check if all of the operands are constants or identical.
528 bool AllConst = true;
529 bool AllSameScalar = true;
530 for (unsigned i = 0, e = VF; i < e; ++i) {
531 AllConst &= !!dyn_cast<Constant>(VL[i]);
532 AllSameScalar &= (VL[0] == VL[i]);
Nadav Rotem8543ba32013-04-12 21:16:54 +0000533 // The instruction must be in the same BB, and it must be vectorizable.
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000534 Instruction *I = dyn_cast<Instruction>(VL[i]);
Nadav Rotem8543ba32013-04-12 21:16:54 +0000535 if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB))
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000536 return Scalarize(VL, VecTy);
537 }
538
Nadav Rotem8543ba32013-04-12 21:16:54 +0000539 // Check that this is a simple vector constant.
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000540 if (AllConst || AllSameScalar) return Scalarize(VL, VecTy);
541
542 // Scalarize unknown structures.
543 Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
544 if (!VL0) return Scalarize(VL, VecTy);
545
Nadav Rotem8543ba32013-04-12 21:16:54 +0000546 if (VectorizedValues.count(VL0)) return VectorizedValues[VL0];
547
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000548 unsigned Opcode = VL0->getOpcode();
549 for (unsigned i = 0, e = VF; i < e; ++i) {
550 Instruction *I = dyn_cast<Instruction>(VL[i]);
551 // If not all of the instructions are identical then we have to scalarize.
552 if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy);
553 }
554
555 switch (Opcode) {
556 case Instruction::Add:
557 case Instruction::FAdd:
558 case Instruction::Sub:
559 case Instruction::FSub:
560 case Instruction::Mul:
561 case Instruction::FMul:
562 case Instruction::UDiv:
563 case Instruction::SDiv:
564 case Instruction::FDiv:
565 case Instruction::URem:
566 case Instruction::SRem:
567 case Instruction::FRem:
568 case Instruction::Shl:
569 case Instruction::LShr:
570 case Instruction::AShr:
571 case Instruction::And:
572 case Instruction::Or:
573 case Instruction::Xor: {
574 ValueList LHSVL, RHSVL;
575 for (int i = 0; i < VF; ++i) {
576 RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
577 LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
578 }
579
Nadav Rotem8543ba32013-04-12 21:16:54 +0000580 Value *RHS = vectorizeTree_rec(RHSVL, VF);
581 Value *LHS = vectorizeTree_rec(LHSVL, VF);
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000582 IRBuilder<> Builder(GetLastInstr(VL, VF));
583 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(VL0);
Nadav Rotem8543ba32013-04-12 21:16:54 +0000584 Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS);
585 VectorizedValues[VL0] = V;
586 return V;
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000587 }
588 case Instruction::Load: {
589 LoadInst *LI = dyn_cast<LoadInst>(VL0);
590 unsigned Alignment = LI->getAlignment();
591
592 // Check if all of the loads are consecutive.
593 for (unsigned i = 1, e = VF; i < e; ++i)
594 if (!isConsecutiveAccess(VL[i-1], VL[i]))
595 return Scalarize(VL, VecTy);
596
597 IRBuilder<> Builder(GetLastInstr(VL, VF));
598 Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
599 VecTy->getPointerTo());
600 LI = Builder.CreateLoad(VecPtr);
601 LI->setAlignment(Alignment);
Nadav Rotem8543ba32013-04-12 21:16:54 +0000602 VectorizedValues[VL0] = LI;
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000603 return LI;
604 }
605 case Instruction::Store: {
606 StoreInst *SI = dyn_cast<StoreInst>(VL0);
607 unsigned Alignment = SI->getAlignment();
608
609 ValueList ValueOp;
610 for (int i = 0; i < VF; ++i)
611 ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());
612
Nadav Rotem8543ba32013-04-12 21:16:54 +0000613 Value *VecValue = vectorizeTree_rec(ValueOp, VF);
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000614
615 IRBuilder<> Builder(GetLastInstr(VL, VF));
616 Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
617 VecTy->getPointerTo());
618 Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);
619
620 for (int i = 0; i < VF; ++i)
621 cast<Instruction>(VL[i])->eraseFromParent();
622 return 0;
623 }
624 default:
Nadav Rotem8543ba32013-04-12 21:16:54 +0000625 Value *S = Scalarize(VL, VecTy);
626 VectorizedValues[VL0] = S;
627 return S;
Nadav Rotem2d9dec32013-04-09 19:44:35 +0000628 }
629}
630
631} // end of namespace