blob: 2c03f1a05ced46dfd29f1ddf477634ac9edbf1f8 [file] [log] [blame]
David Blaikie1213dbf2015-06-26 16:57:30 +00001//===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines vectorizer utilities.
11//
12//===----------------------------------------------------------------------===//
13
James Molloy55d633b2015-10-12 12:34:45 +000014#include "llvm/ADT/EquivalenceClasses.h"
15#include "llvm/Analysis/DemandedBits.h"
Hal Finkel9cf58c42015-07-11 10:52:42 +000016#include "llvm/Analysis/LoopInfo.h"
17#include "llvm/Analysis/ScalarEvolutionExpressions.h"
18#include "llvm/Analysis/ScalarEvolution.h"
James Molloy55d633b2015-10-12 12:34:45 +000019#include "llvm/Analysis/TargetTransformInfo.h"
David Majnemerb4b27232016-04-19 19:10:21 +000020#include "llvm/Analysis/ValueTracking.h"
David Blaikieb447ac62015-06-26 18:02:52 +000021#include "llvm/Analysis/VectorUtils.h"
Hal Finkel9cf58c42015-07-11 10:52:42 +000022#include "llvm/IR/GetElementPtrTypeIterator.h"
23#include "llvm/IR/PatternMatch.h"
24#include "llvm/IR/Value.h"
Renato Golin3b1d3b02015-08-30 10:49:04 +000025#include "llvm/IR/Constants.h"
26
David Majnemer5eaf08f2015-08-18 22:07:20 +000027using namespace llvm;
28using namespace llvm::PatternMatch;
David Blaikie1213dbf2015-06-26 16:57:30 +000029
30/// \brief Identify if the intrinsic is trivially vectorizable.
31/// This method returns true if the intrinsic's argument types are all
32/// scalars for the scalar form of the intrinsic and all vectors for
33/// the vector form of the intrinsic.
34bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
35 switch (ID) {
36 case Intrinsic::sqrt:
37 case Intrinsic::sin:
38 case Intrinsic::cos:
39 case Intrinsic::exp:
40 case Intrinsic::exp2:
41 case Intrinsic::log:
42 case Intrinsic::log10:
43 case Intrinsic::log2:
44 case Intrinsic::fabs:
45 case Intrinsic::minnum:
46 case Intrinsic::maxnum:
47 case Intrinsic::copysign:
48 case Intrinsic::floor:
49 case Intrinsic::ceil:
50 case Intrinsic::trunc:
51 case Intrinsic::rint:
52 case Intrinsic::nearbyint:
53 case Intrinsic::round:
54 case Intrinsic::bswap:
55 case Intrinsic::ctpop:
56 case Intrinsic::pow:
57 case Intrinsic::fma:
58 case Intrinsic::fmuladd:
59 case Intrinsic::ctlz:
60 case Intrinsic::cttz:
61 case Intrinsic::powi:
62 return true;
63 default:
64 return false;
65 }
66}
67
68/// \brief Identifies if the intrinsic has a scalar operand. It check for
69/// ctlz,cttz and powi special intrinsics whose argument is scalar.
70bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
71 unsigned ScalarOpdIdx) {
72 switch (ID) {
73 case Intrinsic::ctlz:
74 case Intrinsic::cttz:
75 case Intrinsic::powi:
76 return (ScalarOpdIdx == 1);
77 default:
78 return false;
79 }
80}
81
David Blaikie1213dbf2015-06-26 16:57:30 +000082/// \brief Returns intrinsic ID for call.
83/// For the input call instruction it finds mapping intrinsic and returns
84/// its ID, in case it does not found it return not_intrinsic.
David Majnemerb4b27232016-04-19 19:10:21 +000085Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI,
86 const TargetLibraryInfo *TLI) {
87 Intrinsic::ID ID = getIntrinsicForCallSite(CI, TLI);
88 if (ID == Intrinsic::not_intrinsic)
David Blaikie1213dbf2015-06-26 16:57:30 +000089 return Intrinsic::not_intrinsic;
90
David Majnemerb4b27232016-04-19 19:10:21 +000091 if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
92 ID == Intrinsic::lifetime_end || ID == Intrinsic::assume)
93 return ID;
David Blaikie1213dbf2015-06-26 16:57:30 +000094 return Intrinsic::not_intrinsic;
95}
Hal Finkel9cf58c42015-07-11 10:52:42 +000096
97/// \brief Find the operand of the GEP that should be checked for consecutive
98/// stores. This ignores trailing indices that have no effect on the final
99/// pointer.
100unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) {
101 const DataLayout &DL = Gep->getModule()->getDataLayout();
102 unsigned LastOperand = Gep->getNumOperands() - 1;
Eduard Burtescu19eb0312016-01-19 17:28:00 +0000103 unsigned GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType());
Hal Finkel9cf58c42015-07-11 10:52:42 +0000104
105 // Walk backwards and try to peel off zeros.
David Majnemer5eaf08f2015-08-18 22:07:20 +0000106 while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
Hal Finkel9cf58c42015-07-11 10:52:42 +0000107 // Find the type we're currently indexing into.
108 gep_type_iterator GEPTI = gep_type_begin(Gep);
109 std::advance(GEPTI, LastOperand - 1);
110
111 // If it's a type with the same allocation size as the result of the GEP we
112 // can peel off the zero index.
113 if (DL.getTypeAllocSize(*GEPTI) != GEPAllocSize)
114 break;
115 --LastOperand;
116 }
117
118 return LastOperand;
119}
120
121/// \brief If the argument is a GEP, then returns the operand identified by
122/// getGEPInductionOperand. However, if there is some other non-loop-invariant
123/// operand, it returns that instead.
David Majnemer5eaf08f2015-08-18 22:07:20 +0000124Value *llvm::stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
Hal Finkel9cf58c42015-07-11 10:52:42 +0000125 GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
126 if (!GEP)
127 return Ptr;
128
129 unsigned InductionOperand = getGEPInductionOperand(GEP);
130
131 // Check that all of the gep indices are uniform except for our induction
132 // operand.
133 for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i)
134 if (i != InductionOperand &&
135 !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp))
136 return Ptr;
137 return GEP->getOperand(InductionOperand);
138}
139
140/// \brief If a value has only one user that is a CastInst, return it.
David Majnemer5eaf08f2015-08-18 22:07:20 +0000141Value *llvm::getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) {
142 Value *UniqueCast = nullptr;
Hal Finkel9cf58c42015-07-11 10:52:42 +0000143 for (User *U : Ptr->users()) {
144 CastInst *CI = dyn_cast<CastInst>(U);
145 if (CI && CI->getType() == Ty) {
146 if (!UniqueCast)
147 UniqueCast = CI;
148 else
149 return nullptr;
150 }
151 }
152 return UniqueCast;
153}
154
155/// \brief Get the stride of a pointer access in a loop. Looks for symbolic
156/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
David Majnemer5eaf08f2015-08-18 22:07:20 +0000157Value *llvm::getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
Craig Toppere3dcce92015-08-01 22:20:21 +0000158 auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
Hal Finkel9cf58c42015-07-11 10:52:42 +0000159 if (!PtrTy || PtrTy->isAggregateType())
160 return nullptr;
161
162 // Try to remove a gep instruction to make the pointer (actually index at this
163 // point) easier analyzable. If OrigPtr is equal to Ptr we are analzying the
164 // pointer, otherwise, we are analyzing the index.
David Majnemer5eaf08f2015-08-18 22:07:20 +0000165 Value *OrigPtr = Ptr;
Hal Finkel9cf58c42015-07-11 10:52:42 +0000166
167 // The size of the pointer access.
168 int64_t PtrAccessSize = 1;
169
170 Ptr = stripGetElementPtr(Ptr, SE, Lp);
171 const SCEV *V = SE->getSCEV(Ptr);
172
173 if (Ptr != OrigPtr)
174 // Strip off casts.
175 while (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V))
176 V = C->getOperand();
177
178 const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
179 if (!S)
180 return nullptr;
181
182 V = S->getStepRecurrence(*SE);
183 if (!V)
184 return nullptr;
185
186 // Strip off the size of access multiplication if we are still analyzing the
187 // pointer.
188 if (OrigPtr == Ptr) {
Hal Finkel9cf58c42015-07-11 10:52:42 +0000189 if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
190 if (M->getOperand(0)->getSCEVType() != scConstant)
191 return nullptr;
192
Sanjoy Das0de2fec2015-12-17 20:28:46 +0000193 const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt();
Hal Finkel9cf58c42015-07-11 10:52:42 +0000194
195 // Huge step value - give up.
196 if (APStepVal.getBitWidth() > 64)
197 return nullptr;
198
199 int64_t StepVal = APStepVal.getSExtValue();
200 if (PtrAccessSize != StepVal)
201 return nullptr;
202 V = M->getOperand(1);
203 }
204 }
205
206 // Strip off casts.
207 Type *StripedOffRecurrenceCast = nullptr;
208 if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) {
209 StripedOffRecurrenceCast = C->getType();
210 V = C->getOperand();
211 }
212
213 // Look for the loop invariant symbolic value.
214 const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
215 if (!U)
216 return nullptr;
217
David Majnemer5eaf08f2015-08-18 22:07:20 +0000218 Value *Stride = U->getValue();
Hal Finkel9cf58c42015-07-11 10:52:42 +0000219 if (!Lp->isLoopInvariant(Stride))
220 return nullptr;
221
222 // If we have stripped off the recurrence cast we have to make sure that we
223 // return the value that is used in this loop so that we can replace it later.
224 if (StripedOffRecurrenceCast)
225 Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast);
226
227 return Stride;
228}
David Majnemer599ca442015-07-13 01:15:53 +0000229
230/// \brief Given a vector and an element number, see if the scalar value is
231/// already around as a register, for example if it were inserted then extracted
232/// from the vector.
David Majnemer5eaf08f2015-08-18 22:07:20 +0000233Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
David Majnemer599ca442015-07-13 01:15:53 +0000234 assert(V->getType()->isVectorTy() && "Not looking at a vector?");
235 VectorType *VTy = cast<VectorType>(V->getType());
236 unsigned Width = VTy->getNumElements();
237 if (EltNo >= Width) // Out of range access.
238 return UndefValue::get(VTy->getElementType());
239
240 if (Constant *C = dyn_cast<Constant>(V))
241 return C->getAggregateElement(EltNo);
242
243 if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
244 // If this is an insert to a variable element, we don't know what it is.
245 if (!isa<ConstantInt>(III->getOperand(2)))
246 return nullptr;
247 unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
248
249 // If this is an insert to the element we are looking for, return the
250 // inserted value.
251 if (EltNo == IIElt)
252 return III->getOperand(1);
253
254 // Otherwise, the insertelement doesn't modify the value, recurse on its
255 // vector input.
256 return findScalarElement(III->getOperand(0), EltNo);
257 }
258
259 if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
260 unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements();
261 int InEl = SVI->getMaskValue(EltNo);
262 if (InEl < 0)
263 return UndefValue::get(VTy->getElementType());
264 if (InEl < (int)LHSWidth)
265 return findScalarElement(SVI->getOperand(0), InEl);
266 return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);
267 }
268
269 // Extract a value from a vector add operation with a constant zero.
270 Value *Val = nullptr; Constant *Con = nullptr;
David Majnemerc6bb0e22015-08-18 22:07:25 +0000271 if (match(V, m_Add(m_Value(Val), m_Constant(Con))))
272 if (Constant *Elt = Con->getAggregateElement(EltNo))
273 if (Elt->isNullValue())
274 return findScalarElement(Val, EltNo);
David Majnemer599ca442015-07-13 01:15:53 +0000275
276 // Otherwise, we don't know.
277 return nullptr;
278}
Renato Golin3b1d3b02015-08-30 10:49:04 +0000279
280/// \brief Get splat value if the input is a splat vector or return nullptr.
Elena Demikhovsky63a7ca92015-08-30 13:48:02 +0000281/// This function is not fully general. It checks only 2 cases:
282/// the input value is (1) a splat constants vector or (2) a sequence
283/// of instructions that broadcast a single value into a vector.
284///
Elena Demikhovsky0781d7b2015-12-01 12:08:36 +0000285const llvm::Value *llvm::getSplatValue(const Value *V) {
286
287 if (auto *C = dyn_cast<Constant>(V))
Elena Demikhovsky47fa2712015-12-01 12:30:40 +0000288 if (isa<VectorType>(V->getType()))
289 return C->getSplatValue();
Elena Demikhovsky63a7ca92015-08-30 13:48:02 +0000290
291 auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V);
Renato Golin3b1d3b02015-08-30 10:49:04 +0000292 if (!ShuffleInst)
293 return nullptr;
Elena Demikhovsky63a7ca92015-08-30 13:48:02 +0000294 // All-zero (or undef) shuffle mask elements.
295 for (int MaskElt : ShuffleInst->getShuffleMask())
296 if (MaskElt != 0 && MaskElt != -1)
Renato Golin3b1d3b02015-08-30 10:49:04 +0000297 return nullptr;
298 // The first shuffle source is 'insertelement' with index 0.
Elena Demikhovsky63a7ca92015-08-30 13:48:02 +0000299 auto *InsertEltInst =
300 dyn_cast<InsertElementInst>(ShuffleInst->getOperand(0));
Renato Golin3b1d3b02015-08-30 10:49:04 +0000301 if (!InsertEltInst || !isa<ConstantInt>(InsertEltInst->getOperand(2)) ||
302 !cast<ConstantInt>(InsertEltInst->getOperand(2))->isNullValue())
303 return nullptr;
304
305 return InsertEltInst->getOperand(1);
306}
James Molloy55d633b2015-10-12 12:34:45 +0000307
Charlie Turner54336a52015-11-26 20:39:51 +0000308MapVector<Instruction *, uint64_t>
James Molloy45f67d52015-11-09 14:32:05 +0000309llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
310 const TargetTransformInfo *TTI) {
James Molloy55d633b2015-10-12 12:34:45 +0000311
312 // DemandedBits will give us every value's live-out bits. But we want
313 // to ensure no extra casts would need to be inserted, so every DAG
314 // of connected values must have the same minimum bitwidth.
James Molloy45f67d52015-11-09 14:32:05 +0000315 EquivalenceClasses<Value *> ECs;
316 SmallVector<Value *, 16> Worklist;
317 SmallPtrSet<Value *, 4> Roots;
318 SmallPtrSet<Value *, 16> Visited;
319 DenseMap<Value *, uint64_t> DBits;
320 SmallPtrSet<Instruction *, 4> InstructionSet;
Charlie Turner54336a52015-11-26 20:39:51 +0000321 MapVector<Instruction *, uint64_t> MinBWs;
James Molloy45f67d52015-11-09 14:32:05 +0000322
James Molloy5c20e272016-05-09 14:32:30 +0000323 assert(Blocks.size() > 0 && "Must have at least one block!");
324 const DataLayout &DL = Blocks[0]->getModule()->getDataLayout();
325
James Molloy55d633b2015-10-12 12:34:45 +0000326 // Determine the roots. We work bottom-up, from truncs or icmps.
327 bool SeenExtFromIllegalType = false;
328 for (auto *BB : Blocks)
329 for (auto &I : *BB) {
330 InstructionSet.insert(&I);
331
332 if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&
333 !TTI->isTypeLegal(I.getOperand(0)->getType()))
334 SeenExtFromIllegalType = true;
James Molloy45f67d52015-11-09 14:32:05 +0000335
James Molloy55d633b2015-10-12 12:34:45 +0000336 // Only deal with non-vector integers up to 64-bits wide.
337 if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&
338 !I.getType()->isVectorTy() &&
339 I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
340 // Don't make work for ourselves. If we know the loaded type is legal,
341 // don't add it to the worklist.
342 if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
343 continue;
James Molloy45f67d52015-11-09 14:32:05 +0000344
James Molloy55d633b2015-10-12 12:34:45 +0000345 Worklist.push_back(&I);
346 Roots.insert(&I);
347 }
348 }
349 // Early exit.
350 if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))
351 return MinBWs;
James Molloy45f67d52015-11-09 14:32:05 +0000352
James Molloy55d633b2015-10-12 12:34:45 +0000353 // Now proceed breadth-first, unioning values together.
354 while (!Worklist.empty()) {
355 Value *Val = Worklist.pop_back_val();
356 Value *Leader = ECs.getOrInsertLeaderValue(Val);
James Molloy45f67d52015-11-09 14:32:05 +0000357
James Molloy55d633b2015-10-12 12:34:45 +0000358 if (Visited.count(Val))
359 continue;
360 Visited.insert(Val);
361
362 // Non-instructions terminate a chain successfully.
363 if (!isa<Instruction>(Val))
364 continue;
365 Instruction *I = cast<Instruction>(Val);
366
367 // If we encounter a type that is larger than 64 bits, we can't represent
368 // it so bail out.
James Molloy5c20e272016-05-09 14:32:30 +0000369 APInt NeededBits = DB.getDemandedBits(I);
370 unsigned BW = NeededBits.getBitWidth();
371 if (BW > 64)
Charlie Turner54336a52015-11-26 20:39:51 +0000372 return MapVector<Instruction *, uint64_t>();
James Molloy45f67d52015-11-09 14:32:05 +0000373
James Molloy5c20e272016-05-09 14:32:30 +0000374 auto NSB = ComputeNumSignBits(I, DL);
375
376 // Query demanded bits for the bits required by the instruction. Remove
377 // any bits that are equal to the sign bit, because we can truncate the
378 // instruction without changing their value.
379 NeededBits &= APInt::getLowBitsSet(BW, BW - NSB);
380 DBits[Leader] |= NeededBits.getZExtValue();
381 DBits[I] |= NeededBits.getZExtValue();
James Molloy45f67d52015-11-09 14:32:05 +0000382
James Molloy55d633b2015-10-12 12:34:45 +0000383 // Casts, loads and instructions outside of our range terminate a chain
384 // successfully.
385 if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) ||
386 !InstructionSet.count(I))
387 continue;
388
389 // Unsafe casts terminate a chain unsuccessfully. We can't do anything
390 // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
391 // transform anything that relies on them.
392 if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) ||
393 !I->getType()->isIntegerTy()) {
394 DBits[Leader] |= ~0ULL;
395 continue;
396 }
397
398 // We don't modify the types of PHIs. Reductions will already have been
399 // truncated if possible, and inductions' sizes will have been chosen by
400 // indvars.
401 if (isa<PHINode>(I))
402 continue;
403
404 if (DBits[Leader] == ~0ULL)
405 // All bits demanded, no point continuing.
406 continue;
407
408 for (Value *O : cast<User>(I)->operands()) {
409 ECs.unionSets(Leader, O);
410 Worklist.push_back(O);
411 }
412 }
413
414 // Now we've discovered all values, walk them to see if there are
415 // any users we didn't see. If there are, we can't optimize that
416 // chain.
417 for (auto &I : DBits)
418 for (auto *U : I.first->users())
419 if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
420 DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;
James Molloy45f67d52015-11-09 14:32:05 +0000421
James Molloy55d633b2015-10-12 12:34:45 +0000422 for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) {
423 uint64_t LeaderDemandedBits = 0;
424 for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI)
425 LeaderDemandedBits |= DBits[*MI];
426
427 uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) -
428 llvm::countLeadingZeros(LeaderDemandedBits);
429 // Round up to a power of 2
430 if (!isPowerOf2_64((uint64_t)MinBW))
431 MinBW = NextPowerOf2(MinBW);
James Molloy8e46cd02016-03-30 10:11:43 +0000432
433 // We don't modify the types of PHIs. Reductions will already have been
434 // truncated if possible, and inductions' sizes will have been chosen by
435 // indvars.
436 // If we are required to shrink a PHI, abandon this entire equivalence class.
437 bool Abort = false;
438 for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI)
439 if (isa<PHINode>(*MI) && MinBW < (*MI)->getType()->getScalarSizeInBits()) {
440 Abort = true;
441 break;
442 }
443 if (Abort)
444 continue;
445
James Molloy55d633b2015-10-12 12:34:45 +0000446 for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI) {
447 if (!isa<Instruction>(*MI))
448 continue;
449 Type *Ty = (*MI)->getType();
450 if (Roots.count(*MI))
451 Ty = cast<Instruction>(*MI)->getOperand(0)->getType();
452 if (MinBW < Ty->getScalarSizeInBits())
453 MinBWs[cast<Instruction>(*MI)] = MinBW;
454 }
455 }
456
457 return MinBWs;
458}