blob: 79dac048921d654f6cf24e5be1116bc8e8a2b6bb [file] [log] [blame]
Adam Nemet04563272015-02-01 16:56:15 +00001//===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// The implementation for the loop memory dependence that was originally
11// developed for the loop vectorizer.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Analysis/LoopAccessAnalysis.h"
16#include "llvm/Analysis/LoopInfo.h"
Adam Nemet7206d7a2015-02-06 18:31:04 +000017#include "llvm/Analysis/ScalarEvolutionExpander.h"
Benjamin Kramer799003b2015-03-23 19:32:43 +000018#include "llvm/Analysis/TargetLibraryInfo.h"
Adam Nemet04563272015-02-01 16:56:15 +000019#include "llvm/Analysis/ValueTracking.h"
20#include "llvm/IR/DiagnosticInfo.h"
21#include "llvm/IR/Dominators.h"
Adam Nemet7206d7a2015-02-06 18:31:04 +000022#include "llvm/IR/IRBuilder.h"
Adam Nemet04563272015-02-01 16:56:15 +000023#include "llvm/Support/Debug.h"
Benjamin Kramer799003b2015-03-23 19:32:43 +000024#include "llvm/Support/raw_ostream.h"
David Blaikieb447ac62015-06-26 18:02:52 +000025#include "llvm/Analysis/VectorUtils.h"
Adam Nemet04563272015-02-01 16:56:15 +000026using namespace llvm;
27
Adam Nemet339f42b2015-02-19 19:15:07 +000028#define DEBUG_TYPE "loop-accesses"
Adam Nemet04563272015-02-01 16:56:15 +000029
Adam Nemetf219c642015-02-19 19:14:52 +000030static cl::opt<unsigned, true>
31VectorizationFactor("force-vector-width", cl::Hidden,
32 cl::desc("Sets the SIMD width. Zero is autoselect."),
33 cl::location(VectorizerParams::VectorizationFactor));
Adam Nemet1d862af2015-02-26 04:39:09 +000034unsigned VectorizerParams::VectorizationFactor;
Adam Nemetf219c642015-02-19 19:14:52 +000035
36static cl::opt<unsigned, true>
37VectorizationInterleave("force-vector-interleave", cl::Hidden,
38 cl::desc("Sets the vectorization interleave count. "
39 "Zero is autoselect."),
40 cl::location(
41 VectorizerParams::VectorizationInterleave));
Adam Nemet1d862af2015-02-26 04:39:09 +000042unsigned VectorizerParams::VectorizationInterleave;
Adam Nemetf219c642015-02-19 19:14:52 +000043
Adam Nemet1d862af2015-02-26 04:39:09 +000044static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold(
45 "runtime-memory-check-threshold", cl::Hidden,
46 cl::desc("When performing memory disambiguation checks at runtime do not "
47 "generate more than this number of comparisons (default = 8)."),
48 cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8));
49unsigned VectorizerParams::RuntimeMemoryCheckThreshold;
Adam Nemetf219c642015-02-19 19:14:52 +000050
Silviu Baranga1b6b50a2015-07-08 09:16:33 +000051/// \brief The maximum iterations used to merge memory checks
52static cl::opt<unsigned> MemoryCheckMergeThreshold(
53 "memory-check-merge-threshold", cl::Hidden,
54 cl::desc("Maximum number of comparisons done when trying to merge "
55 "runtime memory checks. (default = 100)"),
56 cl::init(100));
57
Adam Nemetf219c642015-02-19 19:14:52 +000058/// Maximum SIMD width.
59const unsigned VectorizerParams::MaxVectorWidth = 64;
60
Adam Nemet9c926572015-03-10 17:40:37 +000061/// \brief We collect interesting dependences up to this threshold.
62static cl::opt<unsigned> MaxInterestingDependence(
63 "max-interesting-dependences", cl::Hidden,
64 cl::desc("Maximum number of interesting dependences collected by "
65 "loop-access analysis (default = 100)"),
66 cl::init(100));
67
Adam Nemetf219c642015-02-19 19:14:52 +000068bool VectorizerParams::isInterleaveForced() {
69 return ::VectorizationInterleave.getNumOccurrences() > 0;
70}
71
Adam Nemet2bd6e982015-02-19 19:15:15 +000072void LoopAccessReport::emitAnalysis(const LoopAccessReport &Message,
73 const Function *TheFunction,
74 const Loop *TheLoop,
75 const char *PassName) {
Adam Nemet04563272015-02-01 16:56:15 +000076 DebugLoc DL = TheLoop->getStartLoc();
Adam Nemet3e876342015-02-19 19:15:13 +000077 if (const Instruction *I = Message.getInstr())
Adam Nemet04563272015-02-01 16:56:15 +000078 DL = I->getDebugLoc();
Adam Nemet339f42b2015-02-19 19:15:07 +000079 emitOptimizationRemarkAnalysis(TheFunction->getContext(), PassName,
Adam Nemet04563272015-02-01 16:56:15 +000080 *TheFunction, DL, Message.str());
81}
82
83Value *llvm::stripIntegerCast(Value *V) {
84 if (CastInst *CI = dyn_cast<CastInst>(V))
85 if (CI->getOperand(0)->getType()->isIntegerTy())
86 return CI->getOperand(0);
87 return V;
88}
89
90const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE,
Adam Nemet8bc61df2015-02-24 00:41:59 +000091 const ValueToValueMap &PtrToStride,
Adam Nemet04563272015-02-01 16:56:15 +000092 Value *Ptr, Value *OrigPtr) {
93
94 const SCEV *OrigSCEV = SE->getSCEV(Ptr);
95
96 // If there is an entry in the map return the SCEV of the pointer with the
97 // symbolic stride replaced by one.
Adam Nemet8bc61df2015-02-24 00:41:59 +000098 ValueToValueMap::const_iterator SI =
99 PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
Adam Nemet04563272015-02-01 16:56:15 +0000100 if (SI != PtrToStride.end()) {
101 Value *StrideVal = SI->second;
102
103 // Strip casts.
104 StrideVal = stripIntegerCast(StrideVal);
105
106 // Replace symbolic stride by one.
107 Value *One = ConstantInt::get(StrideVal->getType(), 1);
108 ValueToValueMap RewriteMap;
109 RewriteMap[StrideVal] = One;
110
111 const SCEV *ByOne =
112 SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true);
Adam Nemet339f42b2015-02-19 19:15:07 +0000113 DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne
Adam Nemet04563272015-02-01 16:56:15 +0000114 << "\n");
115 return ByOne;
116 }
117
118 // Otherwise, just return the SCEV of the original pointer.
119 return SE->getSCEV(Ptr);
120}
121
Adam Nemet8bc61df2015-02-24 00:41:59 +0000122void LoopAccessInfo::RuntimePointerCheck::insert(
Silviu Baranga1b6b50a2015-07-08 09:16:33 +0000123 Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId,
124 const ValueToValueMap &Strides) {
Adam Nemet04563272015-02-01 16:56:15 +0000125 // Get the stride replaced scev.
126 const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
127 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
128 assert(AR && "Invalid addrec expression");
129 const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
130 const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
131 Pointers.push_back(Ptr);
132 Starts.push_back(AR->getStart());
133 Ends.push_back(ScEnd);
134 IsWritePtr.push_back(WritePtr);
135 DependencySetId.push_back(DepSetId);
136 AliasSetId.push_back(ASId);
Silviu Baranga1b6b50a2015-07-08 09:16:33 +0000137 Exprs.push_back(Sc);
138}
139
140bool LoopAccessInfo::RuntimePointerCheck::needsChecking(
141 const CheckingPtrGroup &M, const CheckingPtrGroup &N,
142 const SmallVectorImpl<int> *PtrPartition) const {
143 for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
144 for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
145 if (needsChecking(M.Members[I], N.Members[J], PtrPartition))
146 return true;
147 return false;
148}
149
150/// Compare \p I and \p J and return the minimum.
151/// Return nullptr in case we couldn't find an answer.
152static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
153 ScalarEvolution *SE) {
154 const SCEV *Diff = SE->getMinusSCEV(J, I);
155 const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
156
157 if (!C)
158 return nullptr;
159 if (C->getValue()->isNegative())
160 return J;
161 return I;
162}
163
164bool LoopAccessInfo::RuntimePointerCheck::CheckingPtrGroup::addPointer(
165 unsigned Index) {
166 // Compare the starts and ends with the known minimum and maximum
167 // of this set. We need to know how we compare against the min/max
168 // of the set in order to be able to emit memchecks.
169 const SCEV *Min0 = getMinFromExprs(RtCheck.Starts[Index], Low, RtCheck.SE);
170 if (!Min0)
171 return false;
172
173 const SCEV *Min1 = getMinFromExprs(RtCheck.Ends[Index], High, RtCheck.SE);
174 if (!Min1)
175 return false;
176
177 // Update the low bound expression if we've found a new min value.
178 if (Min0 == RtCheck.Starts[Index])
179 Low = RtCheck.Starts[Index];
180
181 // Update the high bound expression if we've found a new max value.
182 if (Min1 != RtCheck.Ends[Index])
183 High = RtCheck.Ends[Index];
184
185 Members.push_back(Index);
186 return true;
187}
188
189void LoopAccessInfo::RuntimePointerCheck::groupChecks(
190 MemoryDepChecker::DepCandidates &DepCands,
191 bool UseDependencies) {
192 // We build the groups from dependency candidates equivalence classes
193 // because:
194 // - We know that pointers in the same equivalence class share
195 // the same underlying object and therefore there is a chance
196 // that we can compare pointers
197 // - We wouldn't be able to merge two pointers for which we need
198 // to emit a memcheck. The classes in DepCands are already
199 // conveniently built such that no two pointers in the same
200 // class need checking against each other.
201
202 // We use the following (greedy) algorithm to construct the groups
203 // For every pointer in the equivalence class:
204 // For each existing group:
205 // - if the difference between this pointer and the min/max bounds
206 // of the group is a constant, then make the pointer part of the
207 // group and update the min/max bounds of that group as required.
208
209 CheckingGroups.clear();
210
211 // If we don't have the dependency partitions, construct a new
212 // checking pointer group for each pointer.
213 if (!UseDependencies) {
214 for (unsigned I = 0; I < Pointers.size(); ++I)
215 CheckingGroups.push_back(CheckingPtrGroup(I, *this));
216 return;
217 }
218
219 unsigned TotalComparisons = 0;
220
221 DenseMap<Value *, unsigned> PositionMap;
222 for (unsigned Pointer = 0; Pointer < Pointers.size(); ++Pointer)
223 PositionMap[Pointers[Pointer]] = Pointer;
224
Silviu Barangace3877f2015-07-09 15:18:25 +0000225 // We need to keep track of what pointers we've already seen so we
226 // don't process them twice.
227 SmallSet<unsigned, 2> Seen;
228
Silviu Baranga1b6b50a2015-07-08 09:16:33 +0000229 // Go through all equivalence classes, get the the "pointer check groups"
Silviu Barangace3877f2015-07-09 15:18:25 +0000230 // and add them to the overall solution. We use the order in which accesses
231 // appear in 'Pointers' to enforce determinism.
232 for (unsigned I = 0; I < Pointers.size(); ++I) {
233 // We've seen this pointer before, and therefore already processed
234 // its equivalence class.
235 if (Seen.count(I))
Silviu Baranga1b6b50a2015-07-08 09:16:33 +0000236 continue;
237
Silviu Barangace3877f2015-07-09 15:18:25 +0000238 MemoryDepChecker::MemAccessInfo Access(Pointers[I], IsWritePtr[I]);
Silviu Baranga1b6b50a2015-07-08 09:16:33 +0000239
Silviu Barangace3877f2015-07-09 15:18:25 +0000240 SmallVector<CheckingPtrGroup, 2> Groups;
241 auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
242
Silviu Barangaa647c302015-07-13 14:48:24 +0000243 // Because DepCands is constructed by visiting accesses in the order in
244 // which they appear in alias sets (which is deterministic) and the
245 // iteration order within an equivalence class member is only dependent on
246 // the order in which unions and insertions are performed on the
247 // equivalence class, the iteration order is deterministic.
Silviu Barangace3877f2015-07-09 15:18:25 +0000248 for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
Silviu Baranga1b6b50a2015-07-08 09:16:33 +0000249 MI != ME; ++MI) {
250 unsigned Pointer = PositionMap[MI->getPointer()];
251 bool Merged = false;
Silviu Barangace3877f2015-07-09 15:18:25 +0000252 // Mark this pointer as seen.
253 Seen.insert(Pointer);
Silviu Baranga1b6b50a2015-07-08 09:16:33 +0000254
255 // Go through all the existing sets and see if we can find one
256 // which can include this pointer.
257 for (CheckingPtrGroup &Group : Groups) {
258 // Don't perform more than a certain amount of comparisons.
259 // This should limit the cost of grouping the pointers to something
260 // reasonable. If we do end up hitting this threshold, the algorithm
261 // will create separate groups for all remaining pointers.
262 if (TotalComparisons > MemoryCheckMergeThreshold)
263 break;
264
265 TotalComparisons++;
266
267 if (Group.addPointer(Pointer)) {
268 Merged = true;
269 break;
270 }
271 }
272
273 if (!Merged)
274 // We couldn't add this pointer to any existing set or the threshold
275 // for the number of comparisons has been reached. Create a new group
276 // to hold the current pointer.
277 Groups.push_back(CheckingPtrGroup(Pointer, *this));
278 }
279
280 // We've computed the grouped checks for this partition.
281 // Save the results and continue with the next one.
282 std::copy(Groups.begin(), Groups.end(), std::back_inserter(CheckingGroups));
283 }
Adam Nemet04563272015-02-01 16:56:15 +0000284}
285
Adam Nemetec1e2bb2015-03-10 18:54:26 +0000286bool LoopAccessInfo::RuntimePointerCheck::needsChecking(
287 unsigned I, unsigned J, const SmallVectorImpl<int> *PtrPartition) const {
Adam Nemeta8945b72015-02-18 03:43:58 +0000288 // No need to check if two readonly pointers intersect.
289 if (!IsWritePtr[I] && !IsWritePtr[J])
290 return false;
291
292 // Only need to check pointers between two different dependency sets.
293 if (DependencySetId[I] == DependencySetId[J])
294 return false;
295
296 // Only need to check pointers in the same alias set.
297 if (AliasSetId[I] != AliasSetId[J])
298 return false;
299
Adam Nemetec1e2bb2015-03-10 18:54:26 +0000300 // If PtrPartition is set omit checks between pointers of the same partition.
301 // Partition number -1 means that the pointer is used in multiple partitions.
302 // In this case we can't omit the check.
303 if (PtrPartition && (*PtrPartition)[I] != -1 &&
304 (*PtrPartition)[I] == (*PtrPartition)[J])
305 return false;
306
Adam Nemeta8945b72015-02-18 03:43:58 +0000307 return true;
308}
309
Adam Nemetec1e2bb2015-03-10 18:54:26 +0000310void LoopAccessInfo::RuntimePointerCheck::print(
311 raw_ostream &OS, unsigned Depth,
312 const SmallVectorImpl<int> *PtrPartition) const {
Adam Nemete91cc6e2015-02-19 19:15:19 +0000313
314 OS.indent(Depth) << "Run-time memory checks:\n";
Silviu Baranga1b6b50a2015-07-08 09:16:33 +0000315
Adam Nemete91cc6e2015-02-19 19:15:19 +0000316 unsigned N = 0;
Silviu Baranga1b6b50a2015-07-08 09:16:33 +0000317 for (unsigned I = 0; I < CheckingGroups.size(); ++I)
318 for (unsigned J = I + 1; J < CheckingGroups.size(); ++J)
319 if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) {
320 OS.indent(Depth) << "Check " << N++ << ":\n";
321 OS.indent(Depth + 2) << "Comparing group " << I << ":\n";
322
323 for (unsigned K = 0; K < CheckingGroups[I].Members.size(); ++K) {
324 OS.indent(Depth + 2) << *Pointers[CheckingGroups[I].Members[K]]
325 << "\n";
326 if (PtrPartition)
327 OS << " (Partition: "
328 << (*PtrPartition)[CheckingGroups[I].Members[K]] << ")"
329 << "\n";
330 }
331
332 OS.indent(Depth + 2) << "Against group " << J << ":\n";
333
334 for (unsigned K = 0; K < CheckingGroups[J].Members.size(); ++K) {
335 OS.indent(Depth + 2) << *Pointers[CheckingGroups[J].Members[K]]
336 << "\n";
337 if (PtrPartition)
338 OS << " (Partition: "
339 << (*PtrPartition)[CheckingGroups[J].Members[K]] << ")"
340 << "\n";
341 }
Adam Nemete91cc6e2015-02-19 19:15:19 +0000342 }
Silviu Baranga1b6b50a2015-07-08 09:16:33 +0000343
344 OS.indent(Depth) << "Grouped accesses:\n";
345 for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
346 OS.indent(Depth + 2) << "Group " << I << ":\n";
347 OS.indent(Depth + 4) << "(Low: " << *CheckingGroups[I].Low
348 << " High: " << *CheckingGroups[I].High << ")\n";
349 for (unsigned J = 0; J < CheckingGroups[I].Members.size(); ++J) {
350 OS.indent(Depth + 6) << "Member: " << *Exprs[CheckingGroups[I].Members[J]]
351 << "\n";
352 }
353 }
Adam Nemete91cc6e2015-02-19 19:15:19 +0000354}
355
Silviu Baranga98a13712015-06-08 10:27:06 +0000356unsigned LoopAccessInfo::RuntimePointerCheck::getNumberOfChecks(
Adam Nemet51870d12015-04-07 03:35:26 +0000357 const SmallVectorImpl<int> *PtrPartition) const {
Silviu Baranga1b6b50a2015-07-08 09:16:33 +0000358
359 unsigned NumPartitions = CheckingGroups.size();
Silviu Baranga98a13712015-06-08 10:27:06 +0000360 unsigned CheckCount = 0;
Adam Nemet51870d12015-04-07 03:35:26 +0000361
Silviu Baranga1b6b50a2015-07-08 09:16:33 +0000362 for (unsigned I = 0; I < NumPartitions; ++I)
363 for (unsigned J = I + 1; J < NumPartitions; ++J)
364 if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition))
Silviu Baranga98a13712015-06-08 10:27:06 +0000365 CheckCount++;
366 return CheckCount;
367}
368
369bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking(
370 const SmallVectorImpl<int> *PtrPartition) const {
Silviu Baranga1b6b50a2015-07-08 09:16:33 +0000371 unsigned NumPointers = Pointers.size();
372
373 for (unsigned I = 0; I < NumPointers; ++I)
374 for (unsigned J = I + 1; J < NumPointers; ++J)
375 if (needsChecking(I, J, PtrPartition))
376 return true;
377 return false;
Adam Nemet51870d12015-04-07 03:35:26 +0000378}
379
Adam Nemet04563272015-02-01 16:56:15 +0000380namespace {
381/// \brief Analyses memory accesses in a loop.
382///
383/// Checks whether run time pointer checks are needed and builds sets for data
384/// dependence checking.
385class AccessAnalysis {
386public:
387 /// \brief Read or write access location.
388 typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
389 typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
390
Adam Nemete2b885c2015-04-23 20:09:20 +0000391 AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI,
Adam Nemetdee666b2015-03-10 17:40:34 +0000392 MemoryDepChecker::DepCandidates &DA)
Adam Nemet5dc3b2c2015-07-09 06:47:18 +0000393 : DL(Dl), AST(*AA), LI(LI), DepCands(DA),
394 IsRTCheckAnalysisNeeded(false) {}
Adam Nemet04563272015-02-01 16:56:15 +0000395
396 /// \brief Register a load and whether it is only read from.
Chandler Carruthac80dc72015-06-17 07:18:54 +0000397 void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
Adam Nemet04563272015-02-01 16:56:15 +0000398 Value *Ptr = const_cast<Value*>(Loc.Ptr);
Chandler Carruthecbd1682015-06-17 07:21:38 +0000399 AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);
Adam Nemet04563272015-02-01 16:56:15 +0000400 Accesses.insert(MemAccessInfo(Ptr, false));
401 if (IsReadOnly)
402 ReadOnlyPtr.insert(Ptr);
403 }
404
405 /// \brief Register a store.
Chandler Carruthac80dc72015-06-17 07:18:54 +0000406 void addStore(MemoryLocation &Loc) {
Adam Nemet04563272015-02-01 16:56:15 +0000407 Value *Ptr = const_cast<Value*>(Loc.Ptr);
Chandler Carruthecbd1682015-06-17 07:21:38 +0000408 AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);
Adam Nemet04563272015-02-01 16:56:15 +0000409 Accesses.insert(MemAccessInfo(Ptr, true));
410 }
411
412 /// \brief Check whether we can check the pointers at runtime for
Adam Nemetee614742015-07-09 22:17:38 +0000413 /// non-intersection.
414 ///
415 /// Returns true if we need no check or if we do and we can generate them
416 /// (i.e. the pointers have computable bounds).
Adam Nemet30f16e12015-02-18 03:42:35 +0000417 bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck,
Adam Nemetee614742015-07-09 22:17:38 +0000418 ScalarEvolution *SE, Loop *TheLoop,
Silviu Baranga98a13712015-06-08 10:27:06 +0000419 const ValueToValueMap &Strides,
Adam Nemet04563272015-02-01 16:56:15 +0000420 bool ShouldCheckStride = false);
421
422 /// \brief Goes over all memory accesses, checks whether a RT check is needed
423 /// and builds sets of dependent accesses.
424 void buildDependenceSets() {
425 processMemAccesses();
426 }
427
Adam Nemet5dc3b2c2015-07-09 06:47:18 +0000428 /// \brief Initial processing of memory accesses determined that we need to
429 /// perform dependency checking.
430 ///
431 /// Note that this can later be cleared if we retry memcheck analysis without
432 /// dependency checking (i.e. ShouldRetryWithRuntimeCheck).
Adam Nemet04563272015-02-01 16:56:15 +0000433 bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
Adam Nemetdf3dc5b2015-05-18 15:37:03 +0000434
435 /// We decided that no dependence analysis would be used. Reset the state.
436 void resetDepChecks(MemoryDepChecker &DepChecker) {
437 CheckDeps.clear();
438 DepChecker.clearInterestingDependences();
439 }
Adam Nemet04563272015-02-01 16:56:15 +0000440
441 MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
442
443private:
444 typedef SetVector<MemAccessInfo> PtrAccessSet;
445
446 /// \brief Go over all memory access and check whether runtime pointer checks
Adam Nemetb41d2d32015-07-09 06:47:21 +0000447 /// are needed and build sets of dependency check candidates.
Adam Nemet04563272015-02-01 16:56:15 +0000448 void processMemAccesses();
449
450 /// Set of all accesses.
451 PtrAccessSet Accesses;
452
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000453 const DataLayout &DL;
454
Adam Nemet04563272015-02-01 16:56:15 +0000455 /// Set of accesses that need a further dependence check.
456 MemAccessInfoSet CheckDeps;
457
458 /// Set of pointers that are read only.
459 SmallPtrSet<Value*, 16> ReadOnlyPtr;
460
Adam Nemet04563272015-02-01 16:56:15 +0000461 /// An alias set tracker to partition the access set by underlying object and
462 //intrinsic property (such as TBAA metadata).
463 AliasSetTracker AST;
464
Adam Nemete2b885c2015-04-23 20:09:20 +0000465 LoopInfo *LI;
466
Adam Nemet04563272015-02-01 16:56:15 +0000467 /// Sets of potentially dependent accesses - members of one set share an
468 /// underlying pointer. The set "CheckDeps" identfies which sets really need a
469 /// dependence check.
Adam Nemetdee666b2015-03-10 17:40:34 +0000470 MemoryDepChecker::DepCandidates &DepCands;
Adam Nemet04563272015-02-01 16:56:15 +0000471
Adam Nemet5dc3b2c2015-07-09 06:47:18 +0000472 /// \brief Initial processing of memory accesses determined that we may need
473 /// to add memchecks. Perform the analysis to determine the necessary checks.
474 ///
475 /// Note that, this is different from isDependencyCheckNeeded. When we retry
476 /// memcheck analysis without dependency checking
477 /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared
478 /// while this remains set if we have potentially dependent accesses.
479 bool IsRTCheckAnalysisNeeded;
Adam Nemet04563272015-02-01 16:56:15 +0000480};
481
482} // end anonymous namespace
483
484/// \brief Check whether a pointer can participate in a runtime bounds check.
Adam Nemet8bc61df2015-02-24 00:41:59 +0000485static bool hasComputableBounds(ScalarEvolution *SE,
486 const ValueToValueMap &Strides, Value *Ptr) {
Adam Nemet04563272015-02-01 16:56:15 +0000487 const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
488 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
489 if (!AR)
490 return false;
491
492 return AR->isAffine();
493}
494
Adam Nemet04563272015-02-01 16:56:15 +0000495bool AccessAnalysis::canCheckPtrAtRT(
Adam Nemetee614742015-07-09 22:17:38 +0000496 LoopAccessInfo::RuntimePointerCheck &RtCheck, ScalarEvolution *SE,
497 Loop *TheLoop, const ValueToValueMap &StridesMap, bool ShouldCheckStride) {
Adam Nemet04563272015-02-01 16:56:15 +0000498 // Find pointers with computable bounds. We are going to use this information
499 // to place a runtime bound check.
500 bool CanDoRT = true;
501
Adam Nemetee614742015-07-09 22:17:38 +0000502 bool NeedRTCheck = false;
Adam Nemet5dc3b2c2015-07-09 06:47:18 +0000503 if (!IsRTCheckAnalysisNeeded) return true;
Silviu Baranga98a13712015-06-08 10:27:06 +0000504
Adam Nemet04563272015-02-01 16:56:15 +0000505 bool IsDepCheckNeeded = isDependencyCheckNeeded();
Adam Nemet04563272015-02-01 16:56:15 +0000506
507 // We assign a consecutive id to access from different alias sets.
508 // Accesses between different groups doesn't need to be checked.
509 unsigned ASId = 1;
510 for (auto &AS : AST) {
Adam Nemet424edc62015-07-08 22:58:48 +0000511 int NumReadPtrChecks = 0;
512 int NumWritePtrChecks = 0;
513
Adam Nemet04563272015-02-01 16:56:15 +0000514 // We assign consecutive id to access from different dependence sets.
515 // Accesses within the same set don't need a runtime check.
516 unsigned RunningDepId = 1;
517 DenseMap<Value *, unsigned> DepSetId;
518
519 for (auto A : AS) {
520 Value *Ptr = A.getValue();
521 bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
522 MemAccessInfo Access(Ptr, IsWrite);
523
Adam Nemet424edc62015-07-08 22:58:48 +0000524 if (IsWrite)
525 ++NumWritePtrChecks;
526 else
527 ++NumReadPtrChecks;
528
Adam Nemet04563272015-02-01 16:56:15 +0000529 if (hasComputableBounds(SE, StridesMap, Ptr) &&
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000530 // When we run after a failing dependency check we have to make sure
531 // we don't have wrapping pointers.
Adam Nemet04563272015-02-01 16:56:15 +0000532 (!ShouldCheckStride ||
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000533 isStridedPtr(SE, Ptr, TheLoop, StridesMap) == 1)) {
Adam Nemet04563272015-02-01 16:56:15 +0000534 // The id of the dependence set.
535 unsigned DepId;
536
537 if (IsDepCheckNeeded) {
538 Value *Leader = DepCands.getLeaderValue(Access).getPointer();
539 unsigned &LeaderId = DepSetId[Leader];
540 if (!LeaderId)
541 LeaderId = RunningDepId++;
542 DepId = LeaderId;
543 } else
544 // Each access has its own dependence set.
545 DepId = RunningDepId++;
546
Silviu Baranga1b6b50a2015-07-08 09:16:33 +0000547 RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
Adam Nemet04563272015-02-01 16:56:15 +0000548
Adam Nemet339f42b2015-02-19 19:15:07 +0000549 DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
Adam Nemet04563272015-02-01 16:56:15 +0000550 } else {
Adam Nemetf10ca272015-05-18 15:36:52 +0000551 DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n');
Adam Nemet04563272015-02-01 16:56:15 +0000552 CanDoRT = false;
553 }
554 }
555
Adam Nemet424edc62015-07-08 22:58:48 +0000556 // If we have at least two writes or one write and a read then we need to
557 // check them. But there is no need to checks if there is only one
558 // dependence set for this alias set.
559 //
560 // Note that this function computes CanDoRT and NeedRTCheck independently.
561 // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer
562 // for which we couldn't find the bounds but we don't actually need to emit
563 // any checks so it does not matter.
564 if (!(IsDepCheckNeeded && CanDoRT && RunningDepId == 2))
565 NeedRTCheck |= (NumWritePtrChecks >= 2 || (NumReadPtrChecks >= 1 &&
566 NumWritePtrChecks >= 1));
567
Adam Nemet04563272015-02-01 16:56:15 +0000568 ++ASId;
569 }
570
571 // If the pointers that we would use for the bounds comparison have different
572 // address spaces, assume the values aren't directly comparable, so we can't
573 // use them for the runtime check. We also have to assume they could
574 // overlap. In the future there should be metadata for whether address spaces
575 // are disjoint.
576 unsigned NumPointers = RtCheck.Pointers.size();
577 for (unsigned i = 0; i < NumPointers; ++i) {
578 for (unsigned j = i + 1; j < NumPointers; ++j) {
579 // Only need to check pointers between two different dependency sets.
580 if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j])
581 continue;
582 // Only need to check pointers in the same alias set.
583 if (RtCheck.AliasSetId[i] != RtCheck.AliasSetId[j])
584 continue;
585
586 Value *PtrI = RtCheck.Pointers[i];
587 Value *PtrJ = RtCheck.Pointers[j];
588
589 unsigned ASi = PtrI->getType()->getPointerAddressSpace();
590 unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
591 if (ASi != ASj) {
Adam Nemet339f42b2015-02-19 19:15:07 +0000592 DEBUG(dbgs() << "LAA: Runtime check would require comparison between"
Adam Nemet04d41632015-02-19 19:14:34 +0000593 " different address spaces\n");
Adam Nemet04563272015-02-01 16:56:15 +0000594 return false;
595 }
596 }
597 }
598
Silviu Baranga1b6b50a2015-07-08 09:16:33 +0000599 if (NeedRTCheck && CanDoRT)
600 RtCheck.groupChecks(DepCands, IsDepCheckNeeded);
601
Adam Nemetee614742015-07-09 22:17:38 +0000602 DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks(nullptr)
603 << " pointer comparisons.\n");
604
605 RtCheck.Need = NeedRTCheck;
606
607 bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT;
608 if (!CanDoRTIfNeeded)
609 RtCheck.reset();
610 return CanDoRTIfNeeded;
Adam Nemet04563272015-02-01 16:56:15 +0000611}
612
613void AccessAnalysis::processMemAccesses() {
614 // We process the set twice: first we process read-write pointers, last we
615 // process read-only pointers. This allows us to skip dependence tests for
616 // read-only pointers.
617
Adam Nemet339f42b2015-02-19 19:15:07 +0000618 DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
Adam Nemet04563272015-02-01 16:56:15 +0000619 DEBUG(dbgs() << " AST: "; AST.dump());
Adam Nemet9c926572015-03-10 17:40:37 +0000620 DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
Adam Nemet04563272015-02-01 16:56:15 +0000621 DEBUG({
622 for (auto A : Accesses)
623 dbgs() << "\t" << *A.getPointer() << " (" <<
624 (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
625 "read-only" : "read")) << ")\n";
626 });
627
628 // The AliasSetTracker has nicely partitioned our pointers by metadata
629 // compatibility and potential for underlying-object overlap. As a result, we
630 // only need to check for potential pointer dependencies within each alias
631 // set.
632 for (auto &AS : AST) {
633 // Note that both the alias-set tracker and the alias sets themselves used
634 // linked lists internally and so the iteration order here is deterministic
635 // (matching the original instruction order within each set).
636
637 bool SetHasWrite = false;
638
639 // Map of pointers to last access encountered.
640 typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
641 UnderlyingObjToAccessMap ObjToLastAccess;
642
643 // Set of access to check after all writes have been processed.
644 PtrAccessSet DeferredAccesses;
645
646 // Iterate over each alias set twice, once to process read/write pointers,
647 // and then to process read-only pointers.
648 for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
649 bool UseDeferred = SetIteration > 0;
650 PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
651
652 for (auto AV : AS) {
653 Value *Ptr = AV.getValue();
654
655 // For a single memory access in AliasSetTracker, Accesses may contain
656 // both read and write, and they both need to be handled for CheckDeps.
657 for (auto AC : S) {
658 if (AC.getPointer() != Ptr)
659 continue;
660
661 bool IsWrite = AC.getInt();
662
663 // If we're using the deferred access set, then it contains only
664 // reads.
665 bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
666 if (UseDeferred && !IsReadOnlyPtr)
667 continue;
668 // Otherwise, the pointer must be in the PtrAccessSet, either as a
669 // read or a write.
670 assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
671 S.count(MemAccessInfo(Ptr, false))) &&
672 "Alias-set pointer not in the access set?");
673
674 MemAccessInfo Access(Ptr, IsWrite);
675 DepCands.insert(Access);
676
677 // Memorize read-only pointers for later processing and skip them in
678 // the first round (they need to be checked after we have seen all
679 // write pointers). Note: we also mark pointer that are not
680 // consecutive as "read-only" pointers (so that we check
681 // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
682 if (!UseDeferred && IsReadOnlyPtr) {
683 DeferredAccesses.insert(Access);
684 continue;
685 }
686
687 // If this is a write - check other reads and writes for conflicts. If
688 // this is a read only check other writes for conflicts (but only if
689 // there is no other write to the ptr - this is an optimization to
690 // catch "a[i] = a[i] + " without having to do a dependence check).
691 if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
692 CheckDeps.insert(Access);
Adam Nemet5dc3b2c2015-07-09 06:47:18 +0000693 IsRTCheckAnalysisNeeded = true;
Adam Nemet04563272015-02-01 16:56:15 +0000694 }
695
696 if (IsWrite)
697 SetHasWrite = true;
698
699 // Create sets of pointers connected by a shared alias set and
700 // underlying object.
701 typedef SmallVector<Value *, 16> ValueVector;
702 ValueVector TempObjects;
Adam Nemete2b885c2015-04-23 20:09:20 +0000703
704 GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
705 DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n");
Adam Nemet04563272015-02-01 16:56:15 +0000706 for (Value *UnderlyingObj : TempObjects) {
707 UnderlyingObjToAccessMap::iterator Prev =
708 ObjToLastAccess.find(UnderlyingObj);
709 if (Prev != ObjToLastAccess.end())
710 DepCands.unionSets(Access, Prev->second);
711
712 ObjToLastAccess[UnderlyingObj] = Access;
Adam Nemete2b885c2015-04-23 20:09:20 +0000713 DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
Adam Nemet04563272015-02-01 16:56:15 +0000714 }
715 }
716 }
717 }
718 }
719}
720
Adam Nemet04563272015-02-01 16:56:15 +0000721static bool isInBoundsGep(Value *Ptr) {
722 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
723 return GEP->isInBounds();
724 return false;
725}
726
Adam Nemetc4866d22015-06-26 17:25:43 +0000727/// \brief Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
728/// i.e. monotonically increasing/decreasing.
729static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
730 ScalarEvolution *SE, const Loop *L) {
731 // FIXME: This should probably only return true for NUW.
732 if (AR->getNoWrapFlags(SCEV::NoWrapMask))
733 return true;
734
735 // Scalar evolution does not propagate the non-wrapping flags to values that
736 // are derived from a non-wrapping induction variable because non-wrapping
737 // could be flow-sensitive.
738 //
739 // Look through the potentially overflowing instruction to try to prove
740 // non-wrapping for the *specific* value of Ptr.
741
742 // The arithmetic implied by an inbounds GEP can't overflow.
743 auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
744 if (!GEP || !GEP->isInBounds())
745 return false;
746
747 // Make sure there is only one non-const index and analyze that.
748 Value *NonConstIndex = nullptr;
749 for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index)
750 if (!isa<ConstantInt>(*Index)) {
751 if (NonConstIndex)
752 return false;
753 NonConstIndex = *Index;
754 }
755 if (!NonConstIndex)
756 // The recurrence is on the pointer, ignore for now.
757 return false;
758
759 // The index in GEP is signed. It is non-wrapping if it's derived from a NSW
760 // AddRec using a NSW operation.
761 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
762 if (OBO->hasNoSignedWrap() &&
763 // Assume constant for other the operand so that the AddRec can be
764 // easily found.
765 isa<ConstantInt>(OBO->getOperand(1))) {
766 auto *OpScev = SE->getSCEV(OBO->getOperand(0));
767
768 if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
769 return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW);
770 }
771
772 return false;
773}
774
Adam Nemet04563272015-02-01 16:56:15 +0000775/// \brief Check whether the access through \p Ptr has a constant stride.
Hao Liu32c05392015-06-08 06:39:56 +0000776int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
777 const ValueToValueMap &StridesMap) {
Adam Nemet04563272015-02-01 16:56:15 +0000778 const Type *Ty = Ptr->getType();
779 assert(Ty->isPointerTy() && "Unexpected non-ptr");
780
781 // Make sure that the pointer does not point to aggregate types.
782 const PointerType *PtrTy = cast<PointerType>(Ty);
783 if (PtrTy->getElementType()->isAggregateType()) {
Adam Nemet339f42b2015-02-19 19:15:07 +0000784 DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
785 << *Ptr << "\n");
Adam Nemet04563272015-02-01 16:56:15 +0000786 return 0;
787 }
788
789 const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Ptr);
790
791 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
792 if (!AR) {
Adam Nemet339f42b2015-02-19 19:15:07 +0000793 DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer "
Adam Nemet04d41632015-02-19 19:14:34 +0000794 << *Ptr << " SCEV: " << *PtrScev << "\n");
Adam Nemet04563272015-02-01 16:56:15 +0000795 return 0;
796 }
797
798 // The accesss function must stride over the innermost loop.
799 if (Lp != AR->getLoop()) {
Adam Nemet339f42b2015-02-19 19:15:07 +0000800 DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " <<
Adam Nemet04d41632015-02-19 19:14:34 +0000801 *Ptr << " SCEV: " << *PtrScev << "\n");
Adam Nemet04563272015-02-01 16:56:15 +0000802 }
803
804 // The address calculation must not wrap. Otherwise, a dependence could be
805 // inverted.
806 // An inbounds getelementptr that is a AddRec with a unit stride
807 // cannot wrap per definition. The unit stride requirement is checked later.
808 // An getelementptr without an inbounds attribute and unit stride would have
809 // to access the pointer value "0" which is undefined behavior in address
810 // space 0, therefore we can also vectorize this case.
811 bool IsInBoundsGEP = isInBoundsGep(Ptr);
Adam Nemetc4866d22015-06-26 17:25:43 +0000812 bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, SE, Lp);
Adam Nemet04563272015-02-01 16:56:15 +0000813 bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
814 if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
Adam Nemet339f42b2015-02-19 19:15:07 +0000815 DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
Adam Nemet04d41632015-02-19 19:14:34 +0000816 << *Ptr << " SCEV: " << *PtrScev << "\n");
Adam Nemet04563272015-02-01 16:56:15 +0000817 return 0;
818 }
819
820 // Check the step is constant.
821 const SCEV *Step = AR->getStepRecurrence(*SE);
822
Adam Nemet943befe2015-07-09 00:03:22 +0000823 // Calculate the pointer stride and check if it is constant.
Adam Nemet04563272015-02-01 16:56:15 +0000824 const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
825 if (!C) {
Adam Nemet339f42b2015-02-19 19:15:07 +0000826 DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr <<
Adam Nemet04d41632015-02-19 19:14:34 +0000827 " SCEV: " << *PtrScev << "\n");
Adam Nemet04563272015-02-01 16:56:15 +0000828 return 0;
829 }
830
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000831 auto &DL = Lp->getHeader()->getModule()->getDataLayout();
832 int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
Adam Nemet04563272015-02-01 16:56:15 +0000833 const APInt &APStepVal = C->getValue()->getValue();
834
835 // Huge step value - give up.
836 if (APStepVal.getBitWidth() > 64)
837 return 0;
838
839 int64_t StepVal = APStepVal.getSExtValue();
840
841 // Strided access.
842 int64_t Stride = StepVal / Size;
843 int64_t Rem = StepVal % Size;
844 if (Rem)
845 return 0;
846
847 // If the SCEV could wrap but we have an inbounds gep with a unit stride we
848 // know we can't "wrap around the address space". In case of address space
849 // zero we know that this won't happen without triggering undefined behavior.
850 if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) &&
851 Stride != 1 && Stride != -1)
852 return 0;
853
854 return Stride;
855}
856
Adam Nemet9c926572015-03-10 17:40:37 +0000857bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
858 switch (Type) {
859 case NoDep:
860 case Forward:
861 case BackwardVectorizable:
862 return true;
863
864 case Unknown:
865 case ForwardButPreventsForwarding:
866 case Backward:
867 case BackwardVectorizableButPreventsForwarding:
868 return false;
869 }
David Majnemerd388e932015-03-10 20:23:29 +0000870 llvm_unreachable("unexpected DepType!");
Adam Nemet9c926572015-03-10 17:40:37 +0000871}
872
873bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) {
874 switch (Type) {
875 case NoDep:
876 case Forward:
877 return false;
878
879 case BackwardVectorizable:
880 case Unknown:
881 case ForwardButPreventsForwarding:
882 case Backward:
883 case BackwardVectorizableButPreventsForwarding:
884 return true;
885 }
David Majnemerd388e932015-03-10 20:23:29 +0000886 llvm_unreachable("unexpected DepType!");
Adam Nemet9c926572015-03-10 17:40:37 +0000887}
888
889bool MemoryDepChecker::Dependence::isPossiblyBackward() const {
890 switch (Type) {
891 case NoDep:
892 case Forward:
893 case ForwardButPreventsForwarding:
894 return false;
895
896 case Unknown:
897 case BackwardVectorizable:
898 case Backward:
899 case BackwardVectorizableButPreventsForwarding:
900 return true;
901 }
David Majnemerd388e932015-03-10 20:23:29 +0000902 llvm_unreachable("unexpected DepType!");
Adam Nemet9c926572015-03-10 17:40:37 +0000903}
904
Adam Nemet04563272015-02-01 16:56:15 +0000905bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
906 unsigned TypeByteSize) {
907 // If loads occur at a distance that is not a multiple of a feasible vector
908 // factor store-load forwarding does not take place.
909 // Positive dependences might cause troubles because vectorizing them might
910 // prevent store-load forwarding making vectorized code run a lot slower.
911 // a[i] = a[i-3] ^ a[i-8];
912 // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
913 // hence on your typical architecture store-load forwarding does not take
914 // place. Vectorizing in such cases does not make sense.
915 // Store-load forwarding distance.
916 const unsigned NumCyclesForStoreLoadThroughMemory = 8*TypeByteSize;
917 // Maximum vector factor.
Adam Nemetf219c642015-02-19 19:14:52 +0000918 unsigned MaxVFWithoutSLForwardIssues =
919 VectorizerParams::MaxVectorWidth * TypeByteSize;
Adam Nemet04d41632015-02-19 19:14:34 +0000920 if(MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues)
Adam Nemet04563272015-02-01 16:56:15 +0000921 MaxVFWithoutSLForwardIssues = MaxSafeDepDistBytes;
922
923 for (unsigned vf = 2*TypeByteSize; vf <= MaxVFWithoutSLForwardIssues;
924 vf *= 2) {
925 if (Distance % vf && Distance / vf < NumCyclesForStoreLoadThroughMemory) {
926 MaxVFWithoutSLForwardIssues = (vf >>=1);
927 break;
928 }
929 }
930
Adam Nemet04d41632015-02-19 19:14:34 +0000931 if (MaxVFWithoutSLForwardIssues< 2*TypeByteSize) {
Adam Nemet339f42b2015-02-19 19:15:07 +0000932 DEBUG(dbgs() << "LAA: Distance " << Distance <<
Adam Nemet04d41632015-02-19 19:14:34 +0000933 " that could cause a store-load forwarding conflict\n");
Adam Nemet04563272015-02-01 16:56:15 +0000934 return true;
935 }
936
937 if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
Adam Nemetf219c642015-02-19 19:14:52 +0000938 MaxVFWithoutSLForwardIssues !=
939 VectorizerParams::MaxVectorWidth * TypeByteSize)
Adam Nemet04563272015-02-01 16:56:15 +0000940 MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
941 return false;
942}
943
Hao Liu751004a2015-06-08 04:48:37 +0000944/// \brief Check the dependence for two accesses with the same stride \p Stride.
945/// \p Distance is the positive distance and \p TypeByteSize is type size in
946/// bytes.
947///
948/// \returns true if they are independent.
949static bool areStridedAccessesIndependent(unsigned Distance, unsigned Stride,
950 unsigned TypeByteSize) {
951 assert(Stride > 1 && "The stride must be greater than 1");
952 assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
953 assert(Distance > 0 && "The distance must be non-zero");
954
955 // Skip if the distance is not multiple of type byte size.
956 if (Distance % TypeByteSize)
957 return false;
958
959 unsigned ScaledDist = Distance / TypeByteSize;
960
961 // No dependence if the scaled distance is not multiple of the stride.
962 // E.g.
963 // for (i = 0; i < 1024 ; i += 4)
964 // A[i+2] = A[i] + 1;
965 //
966 // Two accesses in memory (scaled distance is 2, stride is 4):
967 // | A[0] | | | | A[4] | | | |
968 // | | | A[2] | | | | A[6] | |
969 //
970 // E.g.
971 // for (i = 0; i < 1024 ; i += 3)
972 // A[i+4] = A[i] + 1;
973 //
974 // Two accesses in memory (scaled distance is 4, stride is 3):
975 // | A[0] | | | A[3] | | | A[6] | | |
976 // | | | | | A[4] | | | A[7] | |
977 return ScaledDist % Stride;
978}
979
Adam Nemet9c926572015-03-10 17:40:37 +0000980MemoryDepChecker::Dependence::DepType
981MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
982 const MemAccessInfo &B, unsigned BIdx,
983 const ValueToValueMap &Strides) {
Adam Nemet04563272015-02-01 16:56:15 +0000984 assert (AIdx < BIdx && "Must pass arguments in program order");
985
986 Value *APtr = A.getPointer();
987 Value *BPtr = B.getPointer();
988 bool AIsWrite = A.getInt();
989 bool BIsWrite = B.getInt();
990
991 // Two reads are independent.
992 if (!AIsWrite && !BIsWrite)
Adam Nemet9c926572015-03-10 17:40:37 +0000993 return Dependence::NoDep;
Adam Nemet04563272015-02-01 16:56:15 +0000994
995 // We cannot check pointers in different address spaces.
996 if (APtr->getType()->getPointerAddressSpace() !=
997 BPtr->getType()->getPointerAddressSpace())
Adam Nemet9c926572015-03-10 17:40:37 +0000998 return Dependence::Unknown;
Adam Nemet04563272015-02-01 16:56:15 +0000999
1000 const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr);
1001 const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr);
1002
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001003 int StrideAPtr = isStridedPtr(SE, APtr, InnermostLoop, Strides);
1004 int StrideBPtr = isStridedPtr(SE, BPtr, InnermostLoop, Strides);
Adam Nemet04563272015-02-01 16:56:15 +00001005
1006 const SCEV *Src = AScev;
1007 const SCEV *Sink = BScev;
1008
1009 // If the induction step is negative we have to invert source and sink of the
1010 // dependence.
1011 if (StrideAPtr < 0) {
1012 //Src = BScev;
1013 //Sink = AScev;
1014 std::swap(APtr, BPtr);
1015 std::swap(Src, Sink);
1016 std::swap(AIsWrite, BIsWrite);
1017 std::swap(AIdx, BIdx);
1018 std::swap(StrideAPtr, StrideBPtr);
1019 }
1020
1021 const SCEV *Dist = SE->getMinusSCEV(Sink, Src);
1022
Adam Nemet339f42b2015-02-19 19:15:07 +00001023 DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
Adam Nemet04d41632015-02-19 19:14:34 +00001024 << "(Induction step: " << StrideAPtr << ")\n");
Adam Nemet339f42b2015-02-19 19:15:07 +00001025 DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
Adam Nemet04d41632015-02-19 19:14:34 +00001026 << *InstMap[BIdx] << ": " << *Dist << "\n");
Adam Nemet04563272015-02-01 16:56:15 +00001027
Adam Nemet943befe2015-07-09 00:03:22 +00001028 // Need accesses with constant stride. We don't want to vectorize
Adam Nemet04563272015-02-01 16:56:15 +00001029 // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
1030 // the address space.
1031 if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
Adam Nemet943befe2015-07-09 00:03:22 +00001032 DEBUG(dbgs() << "Pointer access with non-constant stride\n");
Adam Nemet9c926572015-03-10 17:40:37 +00001033 return Dependence::Unknown;
Adam Nemet04563272015-02-01 16:56:15 +00001034 }
1035
1036 const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
1037 if (!C) {
Adam Nemet339f42b2015-02-19 19:15:07 +00001038 DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
Adam Nemet04563272015-02-01 16:56:15 +00001039 ShouldRetryWithRuntimeCheck = true;
Adam Nemet9c926572015-03-10 17:40:37 +00001040 return Dependence::Unknown;
Adam Nemet04563272015-02-01 16:56:15 +00001041 }
1042
1043 Type *ATy = APtr->getType()->getPointerElementType();
1044 Type *BTy = BPtr->getType()->getPointerElementType();
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001045 auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
1046 unsigned TypeByteSize = DL.getTypeAllocSize(ATy);
Adam Nemet04563272015-02-01 16:56:15 +00001047
1048 // Negative distances are not plausible dependencies.
1049 const APInt &Val = C->getValue()->getValue();
1050 if (Val.isNegative()) {
1051 bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
1052 if (IsTrueDataDependence &&
1053 (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
1054 ATy != BTy))
Adam Nemet9c926572015-03-10 17:40:37 +00001055 return Dependence::ForwardButPreventsForwarding;
Adam Nemet04563272015-02-01 16:56:15 +00001056
Adam Nemet339f42b2015-02-19 19:15:07 +00001057 DEBUG(dbgs() << "LAA: Dependence is negative: NoDep\n");
Adam Nemet9c926572015-03-10 17:40:37 +00001058 return Dependence::Forward;
Adam Nemet04563272015-02-01 16:56:15 +00001059 }
1060
1061 // Write to the same location with the same size.
1062 // Could be improved to assert type sizes are the same (i32 == float, etc).
1063 if (Val == 0) {
1064 if (ATy == BTy)
Adam Nemet9c926572015-03-10 17:40:37 +00001065 return Dependence::NoDep;
Adam Nemet339f42b2015-02-19 19:15:07 +00001066 DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n");
Adam Nemet9c926572015-03-10 17:40:37 +00001067 return Dependence::Unknown;
Adam Nemet04563272015-02-01 16:56:15 +00001068 }
1069
1070 assert(Val.isStrictlyPositive() && "Expect a positive value");
1071
Adam Nemet04563272015-02-01 16:56:15 +00001072 if (ATy != BTy) {
Adam Nemet04d41632015-02-19 19:14:34 +00001073 DEBUG(dbgs() <<
Adam Nemet339f42b2015-02-19 19:15:07 +00001074 "LAA: ReadWrite-Write positive dependency with different types\n");
Adam Nemet9c926572015-03-10 17:40:37 +00001075 return Dependence::Unknown;
Adam Nemet04563272015-02-01 16:56:15 +00001076 }
1077
1078 unsigned Distance = (unsigned) Val.getZExtValue();
1079
Hao Liu751004a2015-06-08 04:48:37 +00001080 unsigned Stride = std::abs(StrideAPtr);
1081 if (Stride > 1 &&
Adam Nemet0131a562015-07-08 18:47:38 +00001082 areStridedAccessesIndependent(Distance, Stride, TypeByteSize)) {
1083 DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
Hao Liu751004a2015-06-08 04:48:37 +00001084 return Dependence::NoDep;
Adam Nemet0131a562015-07-08 18:47:38 +00001085 }
Hao Liu751004a2015-06-08 04:48:37 +00001086
Adam Nemet04563272015-02-01 16:56:15 +00001087 // Bail out early if passed-in parameters make vectorization not feasible.
Adam Nemetf219c642015-02-19 19:14:52 +00001088 unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
1089 VectorizerParams::VectorizationFactor : 1);
1090 unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
1091 VectorizerParams::VectorizationInterleave : 1);
Hao Liu751004a2015-06-08 04:48:37 +00001092 // The minimum number of iterations for a vectorized/unrolled version.
1093 unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
Adam Nemet04563272015-02-01 16:56:15 +00001094
Hao Liu751004a2015-06-08 04:48:37 +00001095 // It's not vectorizable if the distance is smaller than the minimum distance
1096 // needed for a vectroized/unrolled version. Vectorizing one iteration in
1097 // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
1098 // TypeByteSize (No need to plus the last gap distance).
1099 //
1100 // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1101 // foo(int *A) {
1102 // int *B = (int *)((char *)A + 14);
1103 // for (i = 0 ; i < 1024 ; i += 2)
1104 // B[i] = A[i] + 1;
1105 // }
1106 //
1107 // Two accesses in memory (stride is 2):
1108 // | A[0] | | A[2] | | A[4] | | A[6] | |
1109 // | B[0] | | B[2] | | B[4] |
1110 //
1111 // Distance needs for vectorizing iterations except the last iteration:
1112 // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
1113 // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
1114 //
1115 // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
1116 // 12, which is less than distance.
1117 //
1118 // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
1119 // the minimum distance needed is 28, which is greater than distance. It is
1120 // not safe to do vectorization.
1121 unsigned MinDistanceNeeded =
1122 TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
1123 if (MinDistanceNeeded > Distance) {
1124 DEBUG(dbgs() << "LAA: Failure because of positive distance " << Distance
1125 << '\n');
1126 return Dependence::Backward;
1127 }
1128
1129 // Unsafe if the minimum distance needed is greater than max safe distance.
1130 if (MinDistanceNeeded > MaxSafeDepDistBytes) {
1131 DEBUG(dbgs() << "LAA: Failure because it needs at least "
1132 << MinDistanceNeeded << " size in bytes");
Adam Nemet9c926572015-03-10 17:40:37 +00001133 return Dependence::Backward;
Adam Nemet04563272015-02-01 16:56:15 +00001134 }
1135
Adam Nemet9cc0c392015-02-26 17:58:48 +00001136 // Positive distance bigger than max vectorization factor.
Hao Liu751004a2015-06-08 04:48:37 +00001137 // FIXME: Should use max factor instead of max distance in bytes, which could
1138 // not handle different types.
1139 // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1140 // void foo (int *A, char *B) {
1141 // for (unsigned i = 0; i < 1024; i++) {
1142 // A[i+2] = A[i] + 1;
1143 // B[i+2] = B[i] + 1;
1144 // }
1145 // }
1146 //
1147 // This case is currently unsafe according to the max safe distance. If we
1148 // analyze the two accesses on array B, the max safe dependence distance
1149 // is 2. Then we analyze the accesses on array A, the minimum distance needed
1150 // is 8, which is less than 2 and forbidden vectorization, But actually
1151 // both A and B could be vectorized by 2 iterations.
1152 MaxSafeDepDistBytes =
1153 Distance < MaxSafeDepDistBytes ? Distance : MaxSafeDepDistBytes;
Adam Nemet04563272015-02-01 16:56:15 +00001154
1155 bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
1156 if (IsTrueDataDependence &&
1157 couldPreventStoreLoadForward(Distance, TypeByteSize))
Adam Nemet9c926572015-03-10 17:40:37 +00001158 return Dependence::BackwardVectorizableButPreventsForwarding;
Adam Nemet04563272015-02-01 16:56:15 +00001159
Hao Liu751004a2015-06-08 04:48:37 +00001160 DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
1161 << " with max VF = "
1162 << MaxSafeDepDistBytes / (TypeByteSize * Stride) << '\n');
Adam Nemet04563272015-02-01 16:56:15 +00001163
Adam Nemet9c926572015-03-10 17:40:37 +00001164 return Dependence::BackwardVectorizable;
Adam Nemet04563272015-02-01 16:56:15 +00001165}
1166
Adam Nemetdee666b2015-03-10 17:40:34 +00001167bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
Adam Nemet04563272015-02-01 16:56:15 +00001168 MemAccessInfoSet &CheckDeps,
Adam Nemet8bc61df2015-02-24 00:41:59 +00001169 const ValueToValueMap &Strides) {
Adam Nemet04563272015-02-01 16:56:15 +00001170
1171 MaxSafeDepDistBytes = -1U;
1172 while (!CheckDeps.empty()) {
1173 MemAccessInfo CurAccess = *CheckDeps.begin();
1174
1175 // Get the relevant memory access set.
1176 EquivalenceClasses<MemAccessInfo>::iterator I =
1177 AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
1178
1179 // Check accesses within this set.
1180 EquivalenceClasses<MemAccessInfo>::member_iterator AI, AE;
1181 AI = AccessSets.member_begin(I), AE = AccessSets.member_end();
1182
1183 // Check every access pair.
1184 while (AI != AE) {
1185 CheckDeps.erase(*AI);
1186 EquivalenceClasses<MemAccessInfo>::member_iterator OI = std::next(AI);
1187 while (OI != AE) {
1188 // Check every accessing instruction pair in program order.
1189 for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
1190 I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
1191 for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),
1192 I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {
Adam Nemet9c926572015-03-10 17:40:37 +00001193 auto A = std::make_pair(&*AI, *I1);
1194 auto B = std::make_pair(&*OI, *I2);
1195
1196 assert(*I1 != *I2);
1197 if (*I1 > *I2)
1198 std::swap(A, B);
1199
1200 Dependence::DepType Type =
1201 isDependent(*A.first, A.second, *B.first, B.second, Strides);
1202 SafeForVectorization &= Dependence::isSafeForVectorization(Type);
1203
1204 // Gather dependences unless we accumulated MaxInterestingDependence
1205 // dependences. In that case return as soon as we find the first
1206 // unsafe dependence. This puts a limit on this quadratic
1207 // algorithm.
1208 if (RecordInterestingDependences) {
1209 if (Dependence::isInterestingDependence(Type))
1210 InterestingDependences.push_back(
1211 Dependence(A.second, B.second, Type));
1212
1213 if (InterestingDependences.size() >= MaxInterestingDependence) {
1214 RecordInterestingDependences = false;
1215 InterestingDependences.clear();
1216 DEBUG(dbgs() << "Too many dependences, stopped recording\n");
1217 }
1218 }
1219 if (!RecordInterestingDependences && !SafeForVectorization)
Adam Nemet04563272015-02-01 16:56:15 +00001220 return false;
1221 }
1222 ++OI;
1223 }
1224 AI++;
1225 }
1226 }
Adam Nemet9c926572015-03-10 17:40:37 +00001227
1228 DEBUG(dbgs() << "Total Interesting Dependences: "
1229 << InterestingDependences.size() << "\n");
1230 return SafeForVectorization;
Adam Nemet04563272015-02-01 16:56:15 +00001231}
1232
Adam Nemetec1e2bb2015-03-10 18:54:26 +00001233SmallVector<Instruction *, 4>
1234MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const {
1235 MemAccessInfo Access(Ptr, isWrite);
1236 auto &IndexVector = Accesses.find(Access)->second;
1237
1238 SmallVector<Instruction *, 4> Insts;
1239 std::transform(IndexVector.begin(), IndexVector.end(),
1240 std::back_inserter(Insts),
1241 [&](unsigned Idx) { return this->InstMap[Idx]; });
1242 return Insts;
1243}
1244
Adam Nemet58913d62015-03-10 17:40:43 +00001245const char *MemoryDepChecker::Dependence::DepName[] = {
1246 "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
1247 "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
1248
1249void MemoryDepChecker::Dependence::print(
1250 raw_ostream &OS, unsigned Depth,
1251 const SmallVectorImpl<Instruction *> &Instrs) const {
1252 OS.indent(Depth) << DepName[Type] << ":\n";
1253 OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
1254 OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
1255}
1256
Adam Nemet929c38e2015-02-19 19:15:10 +00001257bool LoopAccessInfo::canAnalyzeLoop() {
Adam Nemet8dcb3b62015-04-17 22:43:10 +00001258 // We need to have a loop header.
1259 DEBUG(dbgs() << "LAA: Found a loop: " <<
1260 TheLoop->getHeader()->getName() << '\n');
1261
Adam Nemet929c38e2015-02-19 19:15:10 +00001262 // We can only analyze innermost loops.
1263 if (!TheLoop->empty()) {
Adam Nemet8dcb3b62015-04-17 22:43:10 +00001264 DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
Adam Nemet2bd6e982015-02-19 19:15:15 +00001265 emitAnalysis(LoopAccessReport() << "loop is not the innermost loop");
Adam Nemet929c38e2015-02-19 19:15:10 +00001266 return false;
1267 }
1268
1269 // We must have a single backedge.
1270 if (TheLoop->getNumBackEdges() != 1) {
Adam Nemet8dcb3b62015-04-17 22:43:10 +00001271 DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
Adam Nemet929c38e2015-02-19 19:15:10 +00001272 emitAnalysis(
Adam Nemet2bd6e982015-02-19 19:15:15 +00001273 LoopAccessReport() <<
Adam Nemet929c38e2015-02-19 19:15:10 +00001274 "loop control flow is not understood by analyzer");
1275 return false;
1276 }
1277
1278 // We must have a single exiting block.
1279 if (!TheLoop->getExitingBlock()) {
Adam Nemet8dcb3b62015-04-17 22:43:10 +00001280 DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
Adam Nemet929c38e2015-02-19 19:15:10 +00001281 emitAnalysis(
Adam Nemet2bd6e982015-02-19 19:15:15 +00001282 LoopAccessReport() <<
Adam Nemet929c38e2015-02-19 19:15:10 +00001283 "loop control flow is not understood by analyzer");
1284 return false;
1285 }
1286
1287 // We only handle bottom-tested loops, i.e. loop in which the condition is
1288 // checked at the end of each iteration. With that we can assume that all
1289 // instructions in the loop are executed the same number of times.
1290 if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
Adam Nemet8dcb3b62015-04-17 22:43:10 +00001291 DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
Adam Nemet929c38e2015-02-19 19:15:10 +00001292 emitAnalysis(
Adam Nemet2bd6e982015-02-19 19:15:15 +00001293 LoopAccessReport() <<
Adam Nemet929c38e2015-02-19 19:15:10 +00001294 "loop control flow is not understood by analyzer");
1295 return false;
1296 }
1297
Adam Nemet929c38e2015-02-19 19:15:10 +00001298 // ScalarEvolution needs to be able to find the exit count.
1299 const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
1300 if (ExitCount == SE->getCouldNotCompute()) {
Adam Nemet2bd6e982015-02-19 19:15:15 +00001301 emitAnalysis(LoopAccessReport() <<
Adam Nemet929c38e2015-02-19 19:15:10 +00001302 "could not determine number of loop iterations");
1303 DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
1304 return false;
1305 }
1306
1307 return true;
1308}
1309
Adam Nemet8bc61df2015-02-24 00:41:59 +00001310void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
Adam Nemet04563272015-02-01 16:56:15 +00001311
1312 typedef SmallVector<Value*, 16> ValueVector;
1313 typedef SmallPtrSet<Value*, 16> ValueSet;
1314
1315 // Holds the Load and Store *instructions*.
1316 ValueVector Loads;
1317 ValueVector Stores;
1318
1319 // Holds all the different accesses in the loop.
1320 unsigned NumReads = 0;
1321 unsigned NumReadWrites = 0;
1322
1323 PtrRtCheck.Pointers.clear();
1324 PtrRtCheck.Need = false;
1325
1326 const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
Adam Nemet04563272015-02-01 16:56:15 +00001327
1328 // For each block.
1329 for (Loop::block_iterator bb = TheLoop->block_begin(),
1330 be = TheLoop->block_end(); bb != be; ++bb) {
1331
1332 // Scan the BB and collect legal loads and stores.
1333 for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
1334 ++it) {
1335
1336 // If this is a load, save it. If this instruction can read from memory
1337 // but is not a load, then we quit. Notice that we don't handle function
1338 // calls that read or write.
1339 if (it->mayReadFromMemory()) {
1340 // Many math library functions read the rounding mode. We will only
1341 // vectorize a loop if it contains known function calls that don't set
1342 // the flag. Therefore, it is safe to ignore this read from memory.
1343 CallInst *Call = dyn_cast<CallInst>(it);
1344 if (Call && getIntrinsicIDForCall(Call, TLI))
1345 continue;
1346
Michael Zolotukhin9b3cf602015-03-17 19:46:50 +00001347 // If the function has an explicit vectorized counterpart, we can safely
1348 // assume that it can be vectorized.
1349 if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
1350 TLI->isFunctionVectorizable(Call->getCalledFunction()->getName()))
1351 continue;
1352
Adam Nemet04563272015-02-01 16:56:15 +00001353 LoadInst *Ld = dyn_cast<LoadInst>(it);
1354 if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
Adam Nemet2bd6e982015-02-19 19:15:15 +00001355 emitAnalysis(LoopAccessReport(Ld)
Adam Nemet04563272015-02-01 16:56:15 +00001356 << "read with atomic ordering or volatile read");
Adam Nemet339f42b2015-02-19 19:15:07 +00001357 DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
Adam Nemet436018c2015-02-19 19:15:00 +00001358 CanVecMem = false;
1359 return;
Adam Nemet04563272015-02-01 16:56:15 +00001360 }
1361 NumLoads++;
1362 Loads.push_back(Ld);
1363 DepChecker.addAccess(Ld);
1364 continue;
1365 }
1366
1367 // Save 'store' instructions. Abort if other instructions write to memory.
1368 if (it->mayWriteToMemory()) {
1369 StoreInst *St = dyn_cast<StoreInst>(it);
1370 if (!St) {
Adam Nemet2bd6e982015-02-19 19:15:15 +00001371 emitAnalysis(LoopAccessReport(it) <<
Adam Nemet04d41632015-02-19 19:14:34 +00001372 "instruction cannot be vectorized");
Adam Nemet436018c2015-02-19 19:15:00 +00001373 CanVecMem = false;
1374 return;
Adam Nemet04563272015-02-01 16:56:15 +00001375 }
1376 if (!St->isSimple() && !IsAnnotatedParallel) {
Adam Nemet2bd6e982015-02-19 19:15:15 +00001377 emitAnalysis(LoopAccessReport(St)
Adam Nemet04563272015-02-01 16:56:15 +00001378 << "write with atomic ordering or volatile write");
Adam Nemet339f42b2015-02-19 19:15:07 +00001379 DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
Adam Nemet436018c2015-02-19 19:15:00 +00001380 CanVecMem = false;
1381 return;
Adam Nemet04563272015-02-01 16:56:15 +00001382 }
1383 NumStores++;
1384 Stores.push_back(St);
1385 DepChecker.addAccess(St);
1386 }
1387 } // Next instr.
1388 } // Next block.
1389
1390 // Now we have two lists that hold the loads and the stores.
1391 // Next, we find the pointers that they use.
1392
1393 // Check if we see any stores. If there are no stores, then we don't
1394 // care if the pointers are *restrict*.
1395 if (!Stores.size()) {
Adam Nemet339f42b2015-02-19 19:15:07 +00001396 DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
Adam Nemet436018c2015-02-19 19:15:00 +00001397 CanVecMem = true;
1398 return;
Adam Nemet04563272015-02-01 16:56:15 +00001399 }
1400
Adam Nemetdee666b2015-03-10 17:40:34 +00001401 MemoryDepChecker::DepCandidates DependentAccesses;
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001402 AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
Adam Nemete2b885c2015-04-23 20:09:20 +00001403 AA, LI, DependentAccesses);
Adam Nemet04563272015-02-01 16:56:15 +00001404
1405 // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
1406 // multiple times on the same object. If the ptr is accessed twice, once
1407 // for read and once for write, it will only appear once (on the write
1408 // list). This is okay, since we are going to check for conflicts between
1409 // writes and between reads and writes, but not between reads and reads.
1410 ValueSet Seen;
1411
1412 ValueVector::iterator I, IE;
1413 for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) {
1414 StoreInst *ST = cast<StoreInst>(*I);
1415 Value* Ptr = ST->getPointerOperand();
Adam Nemetce482502015-04-08 17:48:40 +00001416 // Check for store to loop invariant address.
1417 StoreToLoopInvariantAddress |= isUniform(Ptr);
Adam Nemet04563272015-02-01 16:56:15 +00001418 // If we did *not* see this pointer before, insert it to the read-write
1419 // list. At this phase it is only a 'write' list.
1420 if (Seen.insert(Ptr).second) {
1421 ++NumReadWrites;
1422
Chandler Carruthac80dc72015-06-17 07:18:54 +00001423 MemoryLocation Loc = MemoryLocation::get(ST);
Adam Nemet04563272015-02-01 16:56:15 +00001424 // The TBAA metadata could have a control dependency on the predication
1425 // condition, so we cannot rely on it when determining whether or not we
1426 // need runtime pointer checks.
Adam Nemet01abb2c2015-02-18 03:43:19 +00001427 if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
Adam Nemet04563272015-02-01 16:56:15 +00001428 Loc.AATags.TBAA = nullptr;
1429
1430 Accesses.addStore(Loc);
1431 }
1432 }
1433
1434 if (IsAnnotatedParallel) {
Adam Nemet04d41632015-02-19 19:14:34 +00001435 DEBUG(dbgs()
Adam Nemet339f42b2015-02-19 19:15:07 +00001436 << "LAA: A loop annotated parallel, ignore memory dependency "
Adam Nemet04d41632015-02-19 19:14:34 +00001437 << "checks.\n");
Adam Nemet436018c2015-02-19 19:15:00 +00001438 CanVecMem = true;
1439 return;
Adam Nemet04563272015-02-01 16:56:15 +00001440 }
1441
1442 for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
1443 LoadInst *LD = cast<LoadInst>(*I);
1444 Value* Ptr = LD->getPointerOperand();
1445 // If we did *not* see this pointer before, insert it to the
1446 // read list. If we *did* see it before, then it is already in
1447 // the read-write list. This allows us to vectorize expressions
1448 // such as A[i] += x; Because the address of A[i] is a read-write
1449 // pointer. This only works if the index of A[i] is consecutive.
1450 // If the address of i is unknown (for example A[B[i]]) then we may
1451 // read a few words, modify, and write a few words, and some of the
1452 // words may be written to the same address.
1453 bool IsReadOnlyPtr = false;
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001454 if (Seen.insert(Ptr).second || !isStridedPtr(SE, Ptr, TheLoop, Strides)) {
Adam Nemet04563272015-02-01 16:56:15 +00001455 ++NumReads;
1456 IsReadOnlyPtr = true;
1457 }
1458
Chandler Carruthac80dc72015-06-17 07:18:54 +00001459 MemoryLocation Loc = MemoryLocation::get(LD);
Adam Nemet04563272015-02-01 16:56:15 +00001460 // The TBAA metadata could have a control dependency on the predication
1461 // condition, so we cannot rely on it when determining whether or not we
1462 // need runtime pointer checks.
Adam Nemet01abb2c2015-02-18 03:43:19 +00001463 if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
Adam Nemet04563272015-02-01 16:56:15 +00001464 Loc.AATags.TBAA = nullptr;
1465
1466 Accesses.addLoad(Loc, IsReadOnlyPtr);
1467 }
1468
1469 // If we write (or read-write) to a single destination and there are no
1470 // other reads in this loop then is it safe to vectorize.
1471 if (NumReadWrites == 1 && NumReads == 0) {
Adam Nemet339f42b2015-02-19 19:15:07 +00001472 DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
Adam Nemet436018c2015-02-19 19:15:00 +00001473 CanVecMem = true;
1474 return;
Adam Nemet04563272015-02-01 16:56:15 +00001475 }
1476
1477 // Build dependence sets and check whether we need a runtime pointer bounds
1478 // check.
1479 Accesses.buildDependenceSets();
Adam Nemet04563272015-02-01 16:56:15 +00001480
1481 // Find pointers with computable bounds. We are going to use this information
1482 // to place a runtime bound check.
Adam Nemetee614742015-07-09 22:17:38 +00001483 bool CanDoRTIfNeeded =
1484 Accesses.canCheckPtrAtRT(PtrRtCheck, SE, TheLoop, Strides);
1485 if (!CanDoRTIfNeeded) {
Adam Nemet2bd6e982015-02-19 19:15:15 +00001486 emitAnalysis(LoopAccessReport() << "cannot identify array bounds");
Adam Nemetee614742015-07-09 22:17:38 +00001487 DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
1488 << "the array bounds.\n");
Adam Nemet436018c2015-02-19 19:15:00 +00001489 CanVecMem = false;
1490 return;
Adam Nemet04563272015-02-01 16:56:15 +00001491 }
1492
Adam Nemetee614742015-07-09 22:17:38 +00001493 DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
Adam Nemet04563272015-02-01 16:56:15 +00001494
Adam Nemet436018c2015-02-19 19:15:00 +00001495 CanVecMem = true;
Adam Nemet04563272015-02-01 16:56:15 +00001496 if (Accesses.isDependencyCheckNeeded()) {
Adam Nemet339f42b2015-02-19 19:15:07 +00001497 DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
Adam Nemet04563272015-02-01 16:56:15 +00001498 CanVecMem = DepChecker.areDepsSafe(
1499 DependentAccesses, Accesses.getDependenciesToCheck(), Strides);
1500 MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
1501
1502 if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
Adam Nemet339f42b2015-02-19 19:15:07 +00001503 DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
Adam Nemet04563272015-02-01 16:56:15 +00001504
1505 // Clear the dependency checks. We assume they are not needed.
Adam Nemetdf3dc5b2015-05-18 15:37:03 +00001506 Accesses.resetDepChecks(DepChecker);
Adam Nemet04563272015-02-01 16:56:15 +00001507
1508 PtrRtCheck.reset();
1509 PtrRtCheck.Need = true;
1510
Adam Nemetee614742015-07-09 22:17:38 +00001511 CanDoRTIfNeeded =
1512 Accesses.canCheckPtrAtRT(PtrRtCheck, SE, TheLoop, Strides, true);
Silviu Baranga98a13712015-06-08 10:27:06 +00001513
Adam Nemet949e91a2015-03-10 19:12:41 +00001514 // Check that we found the bounds for the pointer.
Adam Nemetee614742015-07-09 22:17:38 +00001515 if (!CanDoRTIfNeeded) {
Adam Nemetb6dc76f2015-03-10 18:54:19 +00001516 emitAnalysis(LoopAccessReport()
1517 << "cannot check memory dependencies at runtime");
1518 DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
Adam Nemetb6dc76f2015-03-10 18:54:19 +00001519 CanVecMem = false;
1520 return;
1521 }
1522
Adam Nemet04563272015-02-01 16:56:15 +00001523 CanVecMem = true;
1524 }
1525 }
1526
Adam Nemet4bb90a72015-03-10 21:47:39 +00001527 if (CanVecMem)
1528 DEBUG(dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
Adam Nemetee614742015-07-09 22:17:38 +00001529 << (PtrRtCheck.Need ? "" : " don't")
Adam Nemet0f67c6c2015-07-09 22:17:41 +00001530 << " need runtime memory checks.\n");
Adam Nemet4bb90a72015-03-10 21:47:39 +00001531 else {
Adam Nemet2bd6e982015-02-19 19:15:15 +00001532 emitAnalysis(LoopAccessReport() <<
Adam Nemet04d41632015-02-19 19:14:34 +00001533 "unsafe dependent memory operations in loop");
Adam Nemet4bb90a72015-03-10 21:47:39 +00001534 DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
1535 }
Adam Nemet04563272015-02-01 16:56:15 +00001536}
1537
Adam Nemet01abb2c2015-02-18 03:43:19 +00001538bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
1539 DominatorTree *DT) {
Adam Nemet04563272015-02-01 16:56:15 +00001540 assert(TheLoop->contains(BB) && "Unknown block used");
1541
1542 // Blocks that do not dominate the latch need predication.
1543 BasicBlock* Latch = TheLoop->getLoopLatch();
1544 return !DT->dominates(BB, Latch);
1545}
1546
Adam Nemet2bd6e982015-02-19 19:15:15 +00001547void LoopAccessInfo::emitAnalysis(LoopAccessReport &Message) {
Adam Nemetc9228532015-02-19 19:14:56 +00001548 assert(!Report && "Multiple reports generated");
1549 Report = Message;
Adam Nemet04563272015-02-01 16:56:15 +00001550}
1551
Adam Nemet57ac7662015-02-19 19:15:21 +00001552bool LoopAccessInfo::isUniform(Value *V) const {
Adam Nemet04563272015-02-01 16:56:15 +00001553 return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
1554}
Adam Nemet7206d7a2015-02-06 18:31:04 +00001555
1556// FIXME: this function is currently a duplicate of the one in
1557// LoopVectorize.cpp.
1558static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
1559 Instruction *Loc) {
1560 if (FirstInst)
1561 return FirstInst;
1562 if (Instruction *I = dyn_cast<Instruction>(V))
1563 return I->getParent() == Loc->getParent() ? I : nullptr;
1564 return nullptr;
1565}
1566
Adam Nemetec1e2bb2015-03-10 18:54:26 +00001567std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
1568 Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const {
Adam Nemet7206d7a2015-02-06 18:31:04 +00001569 if (!PtrRtCheck.Need)
Adam Nemet90fec842015-04-02 17:51:57 +00001570 return std::make_pair(nullptr, nullptr);
Adam Nemet7206d7a2015-02-06 18:31:04 +00001571
Silviu Baranga1b6b50a2015-07-08 09:16:33 +00001572 SmallVector<TrackingVH<Value>, 2> Starts;
1573 SmallVector<TrackingVH<Value>, 2> Ends;
Adam Nemet7206d7a2015-02-06 18:31:04 +00001574
1575 LLVMContext &Ctx = Loc->getContext();
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001576 SCEVExpander Exp(*SE, DL, "induction");
Adam Nemet7206d7a2015-02-06 18:31:04 +00001577 Instruction *FirstInst = nullptr;
1578
Silviu Baranga1b6b50a2015-07-08 09:16:33 +00001579 for (unsigned i = 0; i < PtrRtCheck.CheckingGroups.size(); ++i) {
1580 const RuntimePointerCheck::CheckingPtrGroup &CG =
1581 PtrRtCheck.CheckingGroups[i];
1582 Value *Ptr = PtrRtCheck.Pointers[CG.Members[0]];
Adam Nemet7206d7a2015-02-06 18:31:04 +00001583 const SCEV *Sc = SE->getSCEV(Ptr);
1584
1585 if (SE->isLoopInvariant(Sc, TheLoop)) {
Silviu Baranga1b6b50a2015-07-08 09:16:33 +00001586 DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr
1587 << "\n");
Adam Nemet7206d7a2015-02-06 18:31:04 +00001588 Starts.push_back(Ptr);
1589 Ends.push_back(Ptr);
1590 } else {
Adam Nemet7206d7a2015-02-06 18:31:04 +00001591 unsigned AS = Ptr->getType()->getPointerAddressSpace();
1592
1593 // Use this type for pointer arithmetic.
1594 Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
Silviu Baranga1b6b50a2015-07-08 09:16:33 +00001595 Value *Start = nullptr, *End = nullptr;
Adam Nemet7206d7a2015-02-06 18:31:04 +00001596
Silviu Baranga1b6b50a2015-07-08 09:16:33 +00001597 DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
1598 Start = Exp.expandCodeFor(CG.Low, PtrArithTy, Loc);
1599 End = Exp.expandCodeFor(CG.High, PtrArithTy, Loc);
1600 DEBUG(dbgs() << "Start: " << *CG.Low << " End: " << *CG.High << "\n");
Adam Nemet7206d7a2015-02-06 18:31:04 +00001601 Starts.push_back(Start);
1602 Ends.push_back(End);
1603 }
1604 }
1605
1606 IRBuilder<> ChkBuilder(Loc);
1607 // Our instructions might fold to a constant.
1608 Value *MemoryRuntimeCheck = nullptr;
Silviu Baranga1b6b50a2015-07-08 09:16:33 +00001609 for (unsigned i = 0; i < PtrRtCheck.CheckingGroups.size(); ++i) {
1610 for (unsigned j = i + 1; j < PtrRtCheck.CheckingGroups.size(); ++j) {
1611 const RuntimePointerCheck::CheckingPtrGroup &CGI =
1612 PtrRtCheck.CheckingGroups[i];
1613 const RuntimePointerCheck::CheckingPtrGroup &CGJ =
1614 PtrRtCheck.CheckingGroups[j];
1615
1616 if (!PtrRtCheck.needsChecking(CGI, CGJ, PtrPartition))
Adam Nemet7206d7a2015-02-06 18:31:04 +00001617 continue;
1618
1619 unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
1620 unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace();
1621
1622 assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) &&
1623 (AS1 == Ends[i]->getType()->getPointerAddressSpace()) &&
1624 "Trying to bounds check pointers with different address spaces");
1625
1626 Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
1627 Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
1628
1629 Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc");
1630 Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc");
1631 Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy1, "bc");
1632 Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy0, "bc");
1633
1634 Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
1635 FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
1636 Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
1637 FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
1638 Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
1639 FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
1640 if (MemoryRuntimeCheck) {
1641 IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict,
1642 "conflict.rdx");
1643 FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
1644 }
1645 MemoryRuntimeCheck = IsConflict;
1646 }
1647 }
1648
Adam Nemet90fec842015-04-02 17:51:57 +00001649 if (!MemoryRuntimeCheck)
1650 return std::make_pair(nullptr, nullptr);
1651
Adam Nemet7206d7a2015-02-06 18:31:04 +00001652 // We have to do this trickery because the IRBuilder might fold the check to a
1653 // constant expression in which case there is no Instruction anchored in a
1654 // the block.
1655 Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
1656 ConstantInt::getTrue(Ctx));
1657 ChkBuilder.Insert(Check, "memcheck.conflict");
1658 FirstInst = getFirstInst(FirstInst, Check, Loc);
1659 return std::make_pair(FirstInst, Check);
1660}
Adam Nemet3bfd93d2015-02-19 19:15:04 +00001661
1662LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001663 const DataLayout &DL,
Adam Nemet3bfd93d2015-02-19 19:15:04 +00001664 const TargetLibraryInfo *TLI, AliasAnalysis *AA,
Adam Nemete2b885c2015-04-23 20:09:20 +00001665 DominatorTree *DT, LoopInfo *LI,
Adam Nemet8bc61df2015-02-24 00:41:59 +00001666 const ValueToValueMap &Strides)
Silviu Baranga1b6b50a2015-07-08 09:16:33 +00001667 : PtrRtCheck(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL), TLI(TLI),
1668 AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
Adam Nemetce482502015-04-08 17:48:40 +00001669 MaxSafeDepDistBytes(-1U), CanVecMem(false),
1670 StoreToLoopInvariantAddress(false) {
Adam Nemet929c38e2015-02-19 19:15:10 +00001671 if (canAnalyzeLoop())
1672 analyzeLoop(Strides);
Adam Nemet3bfd93d2015-02-19 19:15:04 +00001673}
1674
Adam Nemete91cc6e2015-02-19 19:15:19 +00001675void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
1676 if (CanVecMem) {
Adam Nemet26da8e92015-04-14 01:12:55 +00001677 if (PtrRtCheck.Need)
Adam Nemete91cc6e2015-02-19 19:15:19 +00001678 OS.indent(Depth) << "Memory dependences are safe with run-time checks\n";
Adam Nemet26da8e92015-04-14 01:12:55 +00001679 else
1680 OS.indent(Depth) << "Memory dependences are safe\n";
Adam Nemete91cc6e2015-02-19 19:15:19 +00001681 }
1682
1683 if (Report)
1684 OS.indent(Depth) << "Report: " << Report->str() << "\n";
1685
Adam Nemet58913d62015-03-10 17:40:43 +00001686 if (auto *InterestingDependences = DepChecker.getInterestingDependences()) {
1687 OS.indent(Depth) << "Interesting Dependences:\n";
1688 for (auto &Dep : *InterestingDependences) {
1689 Dep.print(OS, Depth + 2, DepChecker.getMemoryInstructions());
1690 OS << "\n";
1691 }
1692 } else
1693 OS.indent(Depth) << "Too many interesting dependences, not recorded\n";
Adam Nemete91cc6e2015-02-19 19:15:19 +00001694
1695 // List the pair of accesses need run-time checks to prove independence.
1696 PtrRtCheck.print(OS, Depth);
1697 OS << "\n";
Adam Nemetc3384322015-05-18 15:36:57 +00001698
1699 OS.indent(Depth) << "Store to invariant address was "
1700 << (StoreToLoopInvariantAddress ? "" : "not ")
1701 << "found in loop.\n";
Adam Nemete91cc6e2015-02-19 19:15:19 +00001702}
1703
Adam Nemet8bc61df2015-02-24 00:41:59 +00001704const LoopAccessInfo &
1705LoopAccessAnalysis::getInfo(Loop *L, const ValueToValueMap &Strides) {
Adam Nemet3bfd93d2015-02-19 19:15:04 +00001706 auto &LAI = LoopAccessInfoMap[L];
1707
1708#ifndef NDEBUG
1709 assert((!LAI || LAI->NumSymbolicStrides == Strides.size()) &&
1710 "Symbolic strides changed for loop");
1711#endif
1712
1713 if (!LAI) {
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001714 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
Adam Nemete2b885c2015-04-23 20:09:20 +00001715 LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI,
1716 Strides);
Adam Nemet3bfd93d2015-02-19 19:15:04 +00001717#ifndef NDEBUG
1718 LAI->NumSymbolicStrides = Strides.size();
1719#endif
1720 }
1721 return *LAI.get();
1722}
1723
Adam Nemete91cc6e2015-02-19 19:15:19 +00001724void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const {
1725 LoopAccessAnalysis &LAA = *const_cast<LoopAccessAnalysis *>(this);
1726
Adam Nemete91cc6e2015-02-19 19:15:19 +00001727 ValueToValueMap NoSymbolicStrides;
1728
1729 for (Loop *TopLevelLoop : *LI)
1730 for (Loop *L : depth_first(TopLevelLoop)) {
1731 OS.indent(2) << L->getHeader()->getName() << ":\n";
1732 auto &LAI = LAA.getInfo(L, NoSymbolicStrides);
1733 LAI.print(OS, 4);
1734 }
1735}
1736
Adam Nemet3bfd93d2015-02-19 19:15:04 +00001737bool LoopAccessAnalysis::runOnFunction(Function &F) {
1738 SE = &getAnalysis<ScalarEvolution>();
Adam Nemet3bfd93d2015-02-19 19:15:04 +00001739 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
1740 TLI = TLIP ? &TLIP->getTLI() : nullptr;
1741 AA = &getAnalysis<AliasAnalysis>();
1742 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
Adam Nemete2b885c2015-04-23 20:09:20 +00001743 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
Adam Nemet3bfd93d2015-02-19 19:15:04 +00001744
1745 return false;
1746}
1747
1748void LoopAccessAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
1749 AU.addRequired<ScalarEvolution>();
1750 AU.addRequired<AliasAnalysis>();
1751 AU.addRequired<DominatorTreeWrapperPass>();
Adam Nemete91cc6e2015-02-19 19:15:19 +00001752 AU.addRequired<LoopInfoWrapperPass>();
Adam Nemet3bfd93d2015-02-19 19:15:04 +00001753
1754 AU.setPreservesAll();
1755}
1756
1757char LoopAccessAnalysis::ID = 0;
1758static const char laa_name[] = "Loop Access Analysis";
1759#define LAA_NAME "loop-accesses"
1760
1761INITIALIZE_PASS_BEGIN(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)
1762INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
1763INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
1764INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
Adam Nemete91cc6e2015-02-19 19:15:19 +00001765INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
Adam Nemet3bfd93d2015-02-19 19:15:04 +00001766INITIALIZE_PASS_END(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)
1767
1768namespace llvm {
1769 Pass *createLAAPass() {
1770 return new LoopAccessAnalysis();
1771 }
1772}