blob: 3efaaf6edd183304283ee66d4585f36d93667e82 [file] [log] [blame]
Nadav Rotem2d9dec32013-04-09 19:44:35 +00001//===- VecUtils.h --- Vectorization Utilities -----------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9#define DEBUG_TYPE "VecUtils"
10
11#include "VecUtils.h"
12#include "llvm/ADT/DenseMap.h"
13#include "llvm/ADT/SmallPtrSet.h"
14#include "llvm/ADT/SmallSet.h"
15#include "llvm/ADT/SmallVector.h"
16#include "llvm/Analysis/AliasAnalysis.h"
17#include "llvm/Analysis/ScalarEvolution.h"
18#include "llvm/Analysis/ScalarEvolutionExpressions.h"
19#include "llvm/Analysis/TargetTransformInfo.h"
20#include "llvm/Analysis/Verifier.h"
21#include "llvm/IR/Constants.h"
22#include "llvm/IR/DataLayout.h"
23#include "llvm/IR/Function.h"
24#include "llvm/IR/Instructions.h"
25#include "llvm/IR/Module.h"
26#include "llvm/IR/Type.h"
27#include "llvm/IR/Value.h"
28#include "llvm/Pass.h"
29#include "llvm/Support/CommandLine.h"
30#include "llvm/Support/Debug.h"
31#include "llvm/Support/raw_ostream.h"
32#include "llvm/Target/TargetLibraryInfo.h"
33#include "llvm/Transforms/Scalar.h"
34#include "llvm/Transforms/Utils/Local.h"
35#include <algorithm>
36#include <map>
37
38using namespace llvm;
39
40namespace llvm {
41
42BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl,
43 TargetTransformInfo *Tti, AliasAnalysis *Aa) :
44 BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa) {
45 numberInstructions();
46}
47
48void BoUpSLP::numberInstructions() {
49 int Loc = 0;
50 InstrIdx.clear();
51 InstrVec.clear();
52 // Number the instructions in the block.
53 for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) {
54 InstrIdx[it] = Loc++;
55 InstrVec.push_back(it);
56 assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation");
57 }
58}
59
60Value *BoUpSLP::getPointerOperand(Value *I) {
61 if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand();
62 if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand();
63 return 0;
64}
65
66unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
67 if (LoadInst *L=dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace();
68 if (StoreInst *S=dyn_cast<StoreInst>(I)) return S->getPointerAddressSpace();
69 return -1;
70}
71
72bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
73 Value *PtrA = getPointerOperand(A);
74 Value *PtrB = getPointerOperand(B);
75 unsigned ASA = getAddressSpaceOperand(A);
76 unsigned ASB = getAddressSpaceOperand(B);
77
78 // Check that the address spaces match and that the pointers are valid.
79 if (!PtrA || !PtrB || (ASA != ASB)) return false;
80
81 // Check that A and B are of the same type.
82 if (PtrA->getType() != PtrB->getType()) return false;
83
84 // Calculate the distance.
85 const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
86 const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
87 const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB);
88 const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV);
89
90 // Non constant distance.
91 if (!ConstOffSCEV) return false;
92
93 unsigned Offset = ConstOffSCEV->getValue()->getSExtValue();
94 Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
95 // The Instructions are connsecutive if the size of the first load/store is
96 // the same as the offset.
Nadav Rotem88dd5f72013-04-10 18:57:27 +000097 unsigned Sz = DL->getTypeStoreSize(Ty);
Nadav Rotem2d9dec32013-04-09 19:44:35 +000098 return ((-Offset) == Sz);
99}
100
101bool BoUpSLP::vectorizeStores(StoreList &Stores, int costThreshold) {
102 ValueSet Heads, Tails;
103 SmallDenseMap<Value*, Value*> ConsecutiveChain;
104 bool Changed = false;
105
106 // Do a quadratic search on all of the given stores and find
107 // all of the pairs of loads that follow each other.
108 for (unsigned i = 0, e = Stores.size(); i < e; ++i)
109 for (unsigned j = 0; j < e; ++j) {
110 if (i == j) continue;
111 if (isConsecutiveAccess(Stores[i], Stores[j])) {
112 Tails.insert(Stores[j]);
113 Heads.insert(Stores[i]);
114 ConsecutiveChain[Stores[i]] = Stores[j];
115 }
116 }
117
118 // For stores that start but don't end a link in the chain:
119 for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) {
120 if (Tails.count(*it)) continue;
121
122 // We found a store instr that starts a chain. Now follow the chain and try
123 // to vectorize it.
124 ValueList Operands;
125 Value *I = *it;
126 int MinCost = 0, MinVF = 0;
127 while (Tails.count(I) || Heads.count(I)) {
128 Operands.push_back(I);
129 unsigned VF = Operands.size();
130 if (isPowerOf2_32(VF) && VF > 1) {
131 int cost = getTreeRollCost(Operands, 0);
132 DEBUG(dbgs() << "Found cost=" << cost << " for VF=" << VF << "\n");
133 if (cost < MinCost) { MinCost = cost; MinVF = VF; }
134 }
135 // Move to the next value in the chain.
136 I = ConsecutiveChain[I];
137 }
138
139 if (MinCost <= costThreshold && MinVF > 1) {
140 DEBUG(dbgs() << "Decided to vectorize cost=" << MinCost << "\n");
141 vectorizeTree(Operands, MinVF);
142 Stores.clear();
143 // The current numbering is invalid because we added and removed instrs.
144 numberInstructions();
145 Changed = true;
146 }
147 }
148
149 return Changed;
150}
151
152int BoUpSLP::getScalarizationCost(Type *Ty) {
153 int Cost = 0;
154 for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
155 Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
156 return Cost;
157}
158
159AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) {
160 if (StoreInst *SI = dyn_cast<StoreInst>(I)) return AA->getLocation(SI);
161 if (LoadInst *LI = dyn_cast<LoadInst>(I)) return AA->getLocation(LI);
162 return AliasAnalysis::Location();
163}
164
165Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) {
166 assert(Src->getParent() == Dst->getParent() && "Not the same BB");
167 BasicBlock::iterator I = Src, E = Dst;
168 /// Scan all of the instruction from SRC to DST and check if
169 /// the source may alias.
170 for (++I; I != E; ++I) {
171 // Ignore store instructions that are marked as 'ignore'.
172 if (MemBarrierIgnoreList.count(I)) continue;
173 if (Src->mayWriteToMemory()) /* Write */ {
174 if (!I->mayReadOrWriteMemory()) continue;
175 } else /* Read */ {
176 if (!I->mayWriteToMemory()) continue;
177 }
178 AliasAnalysis::Location A = getLocation(&*I);
179 AliasAnalysis::Location B = getLocation(Src);
180
181 if (!A.Ptr || !B.Ptr || AA->alias(A, B))
182 return I;
183 }
184 return 0;
185}
186
187int BoUpSLP::getTreeRollCost(ValueList &VL, unsigned Depth) {
188 if (Depth == 6) return max_cost;
189 Type *ScalarTy = VL[0]->getType();
190
191 if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
192 ScalarTy = SI->getValueOperand()->getType();
193
194 /// Don't mess with vectors.
195 if (ScalarTy->isVectorTy()) return max_cost;
196
197 VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
198
199 // Check if all of the operands are constants.
200 bool AllConst = true;
201 bool AllSameScalar = true;
202 for (unsigned i = 0, e = VL.size(); i < e; ++i) {
203 AllConst &= isa<Constant>(VL[i]);
204 AllSameScalar &= (VL[0] == VL[i]);
205 // Must have a single use.
206 Instruction *I = dyn_cast<Instruction>(VL[i]);
207 // Need to scalarize instructions with multiple users or from other BBs.
208 if (I && ((I->getNumUses() > 1) || (I->getParent() != BB)))
209 return getScalarizationCost(VecTy);
210 }
211
212 // Is this a simple vector constant.
213 if (AllConst) return 0;
214
215 // If all of the operands are identical we can broadcast them.
216 if (AllSameScalar)
217 return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
218
219 // Scalarize unknown structures.
220 Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
221 if (!VL0) return getScalarizationCost(VecTy);
222 assert(VL0->getParent() == BB && "Wrong BB");
223
224 unsigned Opcode = VL0->getOpcode();
225 for (unsigned i = 0, e = VL.size(); i < e; ++i) {
226 Instruction *I = dyn_cast<Instruction>(VL[i]);
227 // If not all of the instructions are identical then we have to scalarize.
228 if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy);
229 }
230
231 // Check if it is safe to sink the loads or the stores.
232 if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
233 int MaxIdx = InstrIdx[VL0];
234 for (unsigned i = 1, e = VL.size(); i < e; ++i )
235 MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
236
237 Instruction *Last = InstrVec[MaxIdx];
238 for (unsigned i = 0, e = VL.size(); i < e; ++i ) {
239 if (VL[i] == Last) continue;
240 Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last);
241 if (Barrier) {
242 DEBUG(dbgs() << "LR: Can't sink " << *VL[i] << "\n down to " <<
243 *Last << "\n because of " << *Barrier << "\n");
244 return max_cost;
245 }
246 }
247 }
248
249 switch (Opcode) {
250 case Instruction::Add:
251 case Instruction::FAdd:
252 case Instruction::Sub:
253 case Instruction::FSub:
254 case Instruction::Mul:
255 case Instruction::FMul:
256 case Instruction::UDiv:
257 case Instruction::SDiv:
258 case Instruction::FDiv:
259 case Instruction::URem:
260 case Instruction::SRem:
261 case Instruction::FRem:
262 case Instruction::Shl:
263 case Instruction::LShr:
264 case Instruction::AShr:
265 case Instruction::And:
266 case Instruction::Or:
267 case Instruction::Xor: {
268 ValueList Operands;
269 int Cost = 0;
270 // Calculate the cost of all of the operands.
271 for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
272 // Prepare the operand vector.
273 for (unsigned j = 0; j < VL.size(); ++j)
274 Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
275 Cost += getTreeRollCost(Operands, Depth+1);
276 Operands.clear();
277 }
278
279 // Calculate the cost of this instruction.
280 int ScalarCost = VecTy->getNumElements() *
281 TTI->getArithmeticInstrCost(Opcode, ScalarTy);
282 int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
283 Cost += (VecCost - ScalarCost);
284 return Cost;
285 }
286 case Instruction::Load: {
287 // If we are scalarize the loads, add the cost of forming the vector.
288 for (unsigned i = 0, e = VL.size()-1; i < e; ++i)
289 if (!isConsecutiveAccess(VL[i], VL[i+1]))
290 return getScalarizationCost(VecTy);
291
292 // Cost of wide load - cost of scalar loads.
293 int ScalarLdCost = VecTy->getNumElements() *
294 TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
295 int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
296 return VecLdCost - ScalarLdCost;
297 }
298 case Instruction::Store: {
299 // We know that we can merge the stores. Calculate the cost.
300 int ScalarStCost = VecTy->getNumElements() *
301 TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
302 int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0);
303 int StoreCost = VecStCost - ScalarStCost;
304
305 ValueList Operands;
306 for (unsigned j = 0; j < VL.size(); ++j) {
307 Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
308 MemBarrierIgnoreList.insert(VL[j]);
309 }
310
311 int TotalCost = StoreCost + getTreeRollCost(Operands, Depth + 1);
312 MemBarrierIgnoreList.clear();
313 return TotalCost;
314 }
315 default:
316 // Unable to vectorize unknown instructions.
317 return getScalarizationCost(VecTy);
318 }
319}
320
321Instruction *BoUpSLP::GetLastInstr(ValueList &VL, unsigned VF) {
322 int MaxIdx = InstrIdx[BB->getFirstNonPHI()];
323 for (unsigned i = 0; i < VF; ++i )
324 MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
325 return InstrVec[MaxIdx + 1];
326}
327
328Value *BoUpSLP::Scalarize(ValueList &VL, VectorType *Ty) {
329 IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements()));
330 Value *Vec = UndefValue::get(Ty);
331 for (unsigned i=0; i < Ty->getNumElements(); ++i)
332 Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
333 return Vec;
334}
335
336Value *BoUpSLP::vectorizeTree(ValueList &VL, int VF) {
337 Type *ScalarTy = VL[0]->getType();
338 if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
339 ScalarTy = SI->getValueOperand()->getType();
340 VectorType *VecTy = VectorType::get(ScalarTy, VF);
341
342 // Check if all of the operands are constants or identical.
343 bool AllConst = true;
344 bool AllSameScalar = true;
345 for (unsigned i = 0, e = VF; i < e; ++i) {
346 AllConst &= !!dyn_cast<Constant>(VL[i]);
347 AllSameScalar &= (VL[0] == VL[i]);
348 // Must have a single use.
349 Instruction *I = dyn_cast<Instruction>(VL[i]);
350 if (I && (I->getNumUses() > 1 || I->getParent() != BB))
351 return Scalarize(VL, VecTy);
352 }
353
354 // Is this a simple vector constant.
355 if (AllConst || AllSameScalar) return Scalarize(VL, VecTy);
356
357 // Scalarize unknown structures.
358 Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
359 if (!VL0) return Scalarize(VL, VecTy);
360
361 unsigned Opcode = VL0->getOpcode();
362 for (unsigned i = 0, e = VF; i < e; ++i) {
363 Instruction *I = dyn_cast<Instruction>(VL[i]);
364 // If not all of the instructions are identical then we have to scalarize.
365 if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy);
366 }
367
368 switch (Opcode) {
369 case Instruction::Add:
370 case Instruction::FAdd:
371 case Instruction::Sub:
372 case Instruction::FSub:
373 case Instruction::Mul:
374 case Instruction::FMul:
375 case Instruction::UDiv:
376 case Instruction::SDiv:
377 case Instruction::FDiv:
378 case Instruction::URem:
379 case Instruction::SRem:
380 case Instruction::FRem:
381 case Instruction::Shl:
382 case Instruction::LShr:
383 case Instruction::AShr:
384 case Instruction::And:
385 case Instruction::Or:
386 case Instruction::Xor: {
387 ValueList LHSVL, RHSVL;
388 for (int i = 0; i < VF; ++i) {
389 RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
390 LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
391 }
392
393 Value *RHS = vectorizeTree(RHSVL, VF);
394 Value *LHS = vectorizeTree(LHSVL, VF);
395 IRBuilder<> Builder(GetLastInstr(VL, VF));
396 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(VL0);
397 return Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS);
398 }
399 case Instruction::Load: {
400 LoadInst *LI = dyn_cast<LoadInst>(VL0);
401 unsigned Alignment = LI->getAlignment();
402
403 // Check if all of the loads are consecutive.
404 for (unsigned i = 1, e = VF; i < e; ++i)
405 if (!isConsecutiveAccess(VL[i-1], VL[i]))
406 return Scalarize(VL, VecTy);
407
408 IRBuilder<> Builder(GetLastInstr(VL, VF));
409 Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
410 VecTy->getPointerTo());
411 LI = Builder.CreateLoad(VecPtr);
412 LI->setAlignment(Alignment);
413 return LI;
414 }
415 case Instruction::Store: {
416 StoreInst *SI = dyn_cast<StoreInst>(VL0);
417 unsigned Alignment = SI->getAlignment();
418
419 ValueList ValueOp;
420 for (int i = 0; i < VF; ++i)
421 ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());
422
423 Value *VecValue = vectorizeTree(ValueOp, VF);
424
425 IRBuilder<> Builder(GetLastInstr(VL, VF));
426 Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
427 VecTy->getPointerTo());
428 Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);
429
430 for (int i = 0; i < VF; ++i)
431 cast<Instruction>(VL[i])->eraseFromParent();
432 return 0;
433 }
434 default:
435 return Scalarize(VL, VecTy);
436 }
437}
438
439} // end of namespace