blob: 2e0d8e0374c080af62946dae7e605e2b639253b6 [file] [log] [blame]
Karthik Bhat88db86d2015-03-06 10:11:25 +00001//===- LoopInterchange.cpp - Loop interchange pass------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This Pass handles loop interchange transform.
11// This pass interchanges loops to provide a more cache-friendly memory access
12// patterns.
13//
14//===----------------------------------------------------------------------===//
15
16#include "llvm/ADT/SmallVector.h"
17#include "llvm/Analysis/AliasAnalysis.h"
Daniel Jasperaec2fa32016-12-19 08:22:17 +000018#include "llvm/Analysis/AssumptionCache.h"
Karthik Bhat88db86d2015-03-06 10:11:25 +000019#include "llvm/Analysis/BlockFrequencyInfo.h"
20#include "llvm/Analysis/CodeMetrics.h"
21#include "llvm/Analysis/DependenceAnalysis.h"
22#include "llvm/Analysis/LoopInfo.h"
23#include "llvm/Analysis/LoopIterator.h"
24#include "llvm/Analysis/LoopPass.h"
Florian Hahnad993522017-07-15 13:13:19 +000025#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
Karthik Bhat88db86d2015-03-06 10:11:25 +000026#include "llvm/Analysis/ScalarEvolution.h"
27#include "llvm/Analysis/ScalarEvolutionExpander.h"
28#include "llvm/Analysis/ScalarEvolutionExpressions.h"
29#include "llvm/Analysis/TargetTransformInfo.h"
30#include "llvm/Analysis/ValueTracking.h"
Benjamin Kramer799003b2015-03-23 19:32:43 +000031#include "llvm/IR/Dominators.h"
Karthik Bhat88db86d2015-03-06 10:11:25 +000032#include "llvm/IR/Function.h"
33#include "llvm/IR/IRBuilder.h"
Karthik Bhat88db86d2015-03-06 10:11:25 +000034#include "llvm/IR/InstIterator.h"
Benjamin Kramer799003b2015-03-23 19:32:43 +000035#include "llvm/IR/IntrinsicInst.h"
Karthik Bhat8210fdf2015-04-23 04:51:44 +000036#include "llvm/IR/Module.h"
Karthik Bhat88db86d2015-03-06 10:11:25 +000037#include "llvm/Pass.h"
38#include "llvm/Support/Debug.h"
Karthik Bhat88db86d2015-03-06 10:11:25 +000039#include "llvm/Support/raw_ostream.h"
Benjamin Kramer799003b2015-03-23 19:32:43 +000040#include "llvm/Transforms/Scalar.h"
Karthik Bhat88db86d2015-03-06 10:11:25 +000041#include "llvm/Transforms/Utils/BasicBlockUtils.h"
Benjamin Kramer799003b2015-03-23 19:32:43 +000042#include "llvm/Transforms/Utils/LoopUtils.h"
Davide Italiano9d8f6f82017-01-29 01:55:24 +000043
Karthik Bhat88db86d2015-03-06 10:11:25 +000044using namespace llvm;
45
46#define DEBUG_TYPE "loop-interchange"
47
Chad Rosier72431892016-09-14 17:07:13 +000048static cl::opt<int> LoopInterchangeCostThreshold(
49 "loop-interchange-threshold", cl::init(0), cl::Hidden,
50 cl::desc("Interchange if you gain more than this number"));
51
Karthik Bhat88db86d2015-03-06 10:11:25 +000052namespace {
53
54typedef SmallVector<Loop *, 8> LoopVector;
55
56// TODO: Check if we can use a sparse matrix here.
57typedef std::vector<std::vector<char>> CharMatrix;
58
59// Maximum number of dependencies that can be handled in the dependency matrix.
60static const unsigned MaxMemInstrCount = 100;
61
62// Maximum loop depth supported.
63static const unsigned MaxLoopNestDepth = 10;
64
65struct LoopInterchange;
66
67#ifdef DUMP_DEP_MATRICIES
68void printDepMatrix(CharMatrix &DepMatrix) {
69 for (auto I = DepMatrix.begin(), E = DepMatrix.end(); I != E; ++I) {
70 std::vector<char> Vec = *I;
71 for (auto II = Vec.begin(), EE = Vec.end(); II != EE; ++II)
72 DEBUG(dbgs() << *II << " ");
73 DEBUG(dbgs() << "\n");
74 }
75}
76#endif
77
Karthik Bhat8210fdf2015-04-23 04:51:44 +000078static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
Chandler Carruth49c22192016-05-12 22:19:39 +000079 Loop *L, DependenceInfo *DI) {
Karthik Bhat88db86d2015-03-06 10:11:25 +000080 typedef SmallVector<Value *, 16> ValueVector;
81 ValueVector MemInstr;
82
Karthik Bhat88db86d2015-03-06 10:11:25 +000083 // For each block.
84 for (Loop::block_iterator BB = L->block_begin(), BE = L->block_end();
85 BB != BE; ++BB) {
86 // Scan the BB and collect legal loads and stores.
87 for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E;
88 ++I) {
Chad Rosier09c11092016-09-13 12:56:04 +000089 if (!isa<Instruction>(I))
Karthik Bhat88db86d2015-03-06 10:11:25 +000090 return false;
Chad Rosier09c11092016-09-13 12:56:04 +000091 if (LoadInst *Ld = dyn_cast<LoadInst>(I)) {
92 if (!Ld->isSimple())
93 return false;
94 MemInstr.push_back(&*I);
95 } else if (StoreInst *St = dyn_cast<StoreInst>(I)) {
96 if (!St->isSimple())
97 return false;
98 MemInstr.push_back(&*I);
99 }
Karthik Bhat88db86d2015-03-06 10:11:25 +0000100 }
101 }
102
103 DEBUG(dbgs() << "Found " << MemInstr.size()
104 << " Loads and Stores to analyze\n");
105
106 ValueVector::iterator I, IE, J, JE;
107
108 for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
109 for (J = I, JE = MemInstr.end(); J != JE; ++J) {
110 std::vector<char> Dep;
Chad Rosier09c11092016-09-13 12:56:04 +0000111 Instruction *Src = cast<Instruction>(*I);
112 Instruction *Dst = cast<Instruction>(*J);
Chad Rosier90bcb912016-09-07 16:07:17 +0000113 if (Src == Dst)
Karthik Bhat88db86d2015-03-06 10:11:25 +0000114 continue;
Chad Rosier00eb8db2016-09-21 19:16:47 +0000115 // Ignore Input dependencies.
Chad Rosier90bcb912016-09-07 16:07:17 +0000116 if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
Karthik Bhat88db86d2015-03-06 10:11:25 +0000117 continue;
Chad Rosier00eb8db2016-09-21 19:16:47 +0000118 // Track Output, Flow, and Anti dependencies.
Chad Rosier90bcb912016-09-07 16:07:17 +0000119 if (auto D = DI->depends(Src, Dst, true)) {
Chad Rosier00eb8db2016-09-21 19:16:47 +0000120 assert(D->isOrdered() && "Expected an output, flow or anti dep.");
121 DEBUG(StringRef DepType =
122 D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
123 dbgs() << "Found " << DepType
124 << " dependency between Src and Dst\n"
Chad Rosier90bcb912016-09-07 16:07:17 +0000125 << " Src:" << *Src << "\n Dst:" << *Dst << '\n');
Chad Rosier00eb8db2016-09-21 19:16:47 +0000126 unsigned Levels = D->getLevels();
127 char Direction;
128 for (unsigned II = 1; II <= Levels; ++II) {
129 const SCEV *Distance = D->getDistance(II);
130 const SCEVConstant *SCEVConst =
131 dyn_cast_or_null<SCEVConstant>(Distance);
132 if (SCEVConst) {
133 const ConstantInt *CI = SCEVConst->getValue();
134 if (CI->isNegative())
135 Direction = '<';
136 else if (CI->isZero())
137 Direction = '=';
138 else
139 Direction = '>';
140 Dep.push_back(Direction);
141 } else if (D->isScalar(II)) {
142 Direction = 'S';
143 Dep.push_back(Direction);
144 } else {
145 unsigned Dir = D->getDirection(II);
146 if (Dir == Dependence::DVEntry::LT ||
147 Dir == Dependence::DVEntry::LE)
148 Direction = '<';
149 else if (Dir == Dependence::DVEntry::GT ||
150 Dir == Dependence::DVEntry::GE)
151 Direction = '>';
152 else if (Dir == Dependence::DVEntry::EQ)
153 Direction = '=';
154 else
155 Direction = '*';
156 Dep.push_back(Direction);
157 }
Karthik Bhat88db86d2015-03-06 10:11:25 +0000158 }
Chad Rosier00eb8db2016-09-21 19:16:47 +0000159 while (Dep.size() != Level) {
160 Dep.push_back('I');
161 }
Karthik Bhat88db86d2015-03-06 10:11:25 +0000162
Chad Rosier00eb8db2016-09-21 19:16:47 +0000163 DepMatrix.push_back(Dep);
164 if (DepMatrix.size() > MaxMemInstrCount) {
165 DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
166 << " dependencies inside loop\n");
167 return false;
Karthik Bhat88db86d2015-03-06 10:11:25 +0000168 }
169 }
170 }
171 }
172
Vikram TV74b41112015-12-09 05:16:24 +0000173 // We don't have a DepMatrix to check legality return false.
Karthik Bhat88db86d2015-03-06 10:11:25 +0000174 if (DepMatrix.size() == 0)
175 return false;
176 return true;
177}
178
179// A loop is moved from index 'from' to an index 'to'. Update the Dependence
180// matrix by exchanging the two columns.
Chad Rosierd18ea062016-09-13 13:00:29 +0000181static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
182 unsigned ToIndx) {
Karthik Bhat88db86d2015-03-06 10:11:25 +0000183 unsigned numRows = DepMatrix.size();
184 for (unsigned i = 0; i < numRows; ++i) {
185 char TmpVal = DepMatrix[i][ToIndx];
186 DepMatrix[i][ToIndx] = DepMatrix[i][FromIndx];
187 DepMatrix[i][FromIndx] = TmpVal;
188 }
189}
190
191// Checks if outermost non '=','S'or'I' dependence in the dependence matrix is
192// '>'
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000193static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row,
194 unsigned Column) {
Karthik Bhat88db86d2015-03-06 10:11:25 +0000195 for (unsigned i = 0; i <= Column; ++i) {
196 if (DepMatrix[Row][i] == '<')
197 return false;
198 if (DepMatrix[Row][i] == '>')
199 return true;
200 }
201 // All dependencies were '=','S' or 'I'
202 return false;
203}
204
205// Checks if no dependence exist in the dependency matrix in Row before Column.
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000206static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
207 unsigned Column) {
Karthik Bhat88db86d2015-03-06 10:11:25 +0000208 for (unsigned i = 0; i < Column; ++i) {
Chandler Carruthfca1ff02016-11-03 16:39:25 +0000209 if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' &&
Karthik Bhat88db86d2015-03-06 10:11:25 +0000210 DepMatrix[Row][i] != 'I')
211 return false;
212 }
213 return true;
214}
215
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000216static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
217 unsigned OuterLoopId, char InnerDep,
218 char OuterDep) {
Karthik Bhat88db86d2015-03-06 10:11:25 +0000219
220 if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId))
221 return false;
222
223 if (InnerDep == OuterDep)
224 return true;
225
226 // It is legal to interchange if and only if after interchange no row has a
227 // '>' direction as the leftmost non-'='.
228
229 if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I')
230 return true;
231
232 if (InnerDep == '<')
233 return true;
234
235 if (InnerDep == '>') {
236 // If OuterLoopId represents outermost loop then interchanging will make the
237 // 1st dependency as '>'
238 if (OuterLoopId == 0)
239 return false;
240
241 // If all dependencies before OuterloopId are '=','S'or 'I'. Then
242 // interchanging will result in this row having an outermost non '='
243 // dependency of '>'
244 if (!containsNoDependence(DepMatrix, Row, OuterLoopId))
245 return true;
246 }
247
248 return false;
249}
250
251// Checks if it is legal to interchange 2 loops.
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000252// [Theorem] A permutation of the loops in a perfect nest is legal if and only
Chad Rosier61683a22016-09-13 13:08:53 +0000253// if the direction matrix, after the same permutation is applied to its
254// columns, has no ">" direction as the leftmost non-"=" direction in any row.
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000255static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
256 unsigned InnerLoopId,
257 unsigned OuterLoopId) {
Karthik Bhat88db86d2015-03-06 10:11:25 +0000258
259 unsigned NumRows = DepMatrix.size();
260 // For each row check if it is valid to interchange.
261 for (unsigned Row = 0; Row < NumRows; ++Row) {
262 char InnerDep = DepMatrix[Row][InnerLoopId];
263 char OuterDep = DepMatrix[Row][OuterLoopId];
264 if (InnerDep == '*' || OuterDep == '*')
265 return false;
Chad Rosier61683a22016-09-13 13:08:53 +0000266 if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep))
Karthik Bhat88db86d2015-03-06 10:11:25 +0000267 return false;
268 }
269 return true;
270}
271
272static void populateWorklist(Loop &L, SmallVector<LoopVector, 8> &V) {
273
Chad Rosierf5814f52016-09-07 15:56:59 +0000274 DEBUG(dbgs() << "Calling populateWorklist on Func: "
275 << L.getHeader()->getParent()->getName() << " Loop: %"
276 << L.getHeader()->getName() << '\n');
Karthik Bhat88db86d2015-03-06 10:11:25 +0000277 LoopVector LoopList;
278 Loop *CurrentLoop = &L;
Benjamin Kramere448b5b2015-07-13 17:21:14 +0000279 const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
280 while (!Vec->empty()) {
Karthik Bhat88db86d2015-03-06 10:11:25 +0000281 // The current loop has multiple subloops in it hence it is not tightly
282 // nested.
283 // Discard all loops above it added into Worklist.
Benjamin Kramere448b5b2015-07-13 17:21:14 +0000284 if (Vec->size() != 1) {
Karthik Bhat88db86d2015-03-06 10:11:25 +0000285 LoopList.clear();
286 return;
287 }
288 LoopList.push_back(CurrentLoop);
Benjamin Kramere448b5b2015-07-13 17:21:14 +0000289 CurrentLoop = Vec->front();
290 Vec = &CurrentLoop->getSubLoops();
Karthik Bhat88db86d2015-03-06 10:11:25 +0000291 }
292 LoopList.push_back(CurrentLoop);
Benjamin Kramere448b5b2015-07-13 17:21:14 +0000293 V.push_back(std::move(LoopList));
Karthik Bhat88db86d2015-03-06 10:11:25 +0000294}
295
296static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) {
297 PHINode *InnerIndexVar = L->getCanonicalInductionVariable();
298 if (InnerIndexVar)
299 return InnerIndexVar;
300 if (L->getLoopLatch() == nullptr || L->getLoopPredecessor() == nullptr)
301 return nullptr;
302 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
303 PHINode *PhiVar = cast<PHINode>(I);
304 Type *PhiTy = PhiVar->getType();
305 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
306 !PhiTy->isPointerTy())
307 return nullptr;
308 const SCEVAddRecExpr *AddRec =
309 dyn_cast<SCEVAddRecExpr>(SE->getSCEV(PhiVar));
310 if (!AddRec || !AddRec->isAffine())
311 continue;
312 const SCEV *Step = AddRec->getStepRecurrence(*SE);
Chad Rosierf7c76f92016-09-21 13:28:41 +0000313 if (!isa<SCEVConstant>(Step))
Karthik Bhat88db86d2015-03-06 10:11:25 +0000314 continue;
315 // Found the induction variable.
316 // FIXME: Handle loops with more than one induction variable. Note that,
317 // currently, legality makes sure we have only one induction variable.
318 return PhiVar;
319 }
320 return nullptr;
321}
322
323/// LoopInterchangeLegality checks if it is legal to interchange the loop.
324class LoopInterchangeLegality {
325public:
326 LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
Florian Hahnad993522017-07-15 13:13:19 +0000327 LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA,
328 OptimizationRemarkEmitter *ORE)
Justin Bogner843fb202015-12-15 19:40:57 +0000329 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
Florian Hahnad993522017-07-15 13:13:19 +0000330 PreserveLCSSA(PreserveLCSSA), ORE(ORE), InnerLoopHasReduction(false) {}
Karthik Bhat88db86d2015-03-06 10:11:25 +0000331
332 /// Check if the loops can be interchanged.
333 bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
334 CharMatrix &DepMatrix);
335 /// Check if the loop structure is understood. We do not handle triangular
336 /// loops for now.
337 bool isLoopStructureUnderstood(PHINode *InnerInductionVar);
338
339 bool currentLimitations();
340
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000341 bool hasInnerLoopReduction() { return InnerLoopHasReduction; }
342
Karthik Bhat88db86d2015-03-06 10:11:25 +0000343private:
344 bool tightlyNested(Loop *Outer, Loop *Inner);
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000345 bool containsUnsafeInstructionsInHeader(BasicBlock *BB);
346 bool areAllUsesReductions(Instruction *Ins, Loop *L);
347 bool containsUnsafeInstructionsInLatch(BasicBlock *BB);
348 bool findInductionAndReductions(Loop *L,
349 SmallVector<PHINode *, 8> &Inductions,
350 SmallVector<PHINode *, 8> &Reductions);
Karthik Bhat88db86d2015-03-06 10:11:25 +0000351 Loop *OuterLoop;
352 Loop *InnerLoop;
353
Karthik Bhat88db86d2015-03-06 10:11:25 +0000354 ScalarEvolution *SE;
Justin Bogner843fb202015-12-15 19:40:57 +0000355 LoopInfo *LI;
356 DominatorTree *DT;
357 bool PreserveLCSSA;
Florian Hahnad993522017-07-15 13:13:19 +0000358 /// Interface to emit optimization remarks.
359 OptimizationRemarkEmitter *ORE;
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000360
361 bool InnerLoopHasReduction;
Karthik Bhat88db86d2015-03-06 10:11:25 +0000362};
363
364/// LoopInterchangeProfitability checks if it is profitable to interchange the
365/// loop.
366class LoopInterchangeProfitability {
367public:
Florian Hahnad993522017-07-15 13:13:19 +0000368 LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
369 OptimizationRemarkEmitter *ORE)
370 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
Karthik Bhat88db86d2015-03-06 10:11:25 +0000371
Vikram TV74b41112015-12-09 05:16:24 +0000372 /// Check if the loop interchange is profitable.
Karthik Bhat88db86d2015-03-06 10:11:25 +0000373 bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
374 CharMatrix &DepMatrix);
375
376private:
377 int getInstrOrderCost();
378
379 Loop *OuterLoop;
380 Loop *InnerLoop;
381
382 /// Scev analysis.
383 ScalarEvolution *SE;
Florian Hahnad993522017-07-15 13:13:19 +0000384 /// Interface to emit optimization remarks.
385 OptimizationRemarkEmitter *ORE;
Karthik Bhat88db86d2015-03-06 10:11:25 +0000386};
387
Vikram TV74b41112015-12-09 05:16:24 +0000388/// LoopInterchangeTransform interchanges the loop.
Karthik Bhat88db86d2015-03-06 10:11:25 +0000389class LoopInterchangeTransform {
390public:
391 LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
392 LoopInfo *LI, DominatorTree *DT,
Justin Bogner843fb202015-12-15 19:40:57 +0000393 BasicBlock *LoopNestExit,
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000394 bool InnerLoopContainsReductions)
Karthik Bhat88db86d2015-03-06 10:11:25 +0000395 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000396 LoopExit(LoopNestExit),
397 InnerLoopHasReduction(InnerLoopContainsReductions) {}
Karthik Bhat88db86d2015-03-06 10:11:25 +0000398
399 /// Interchange OuterLoop and InnerLoop.
400 bool transform();
401 void restructureLoops(Loop *InnerLoop, Loop *OuterLoop);
402 void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
Karthik Bhat88db86d2015-03-06 10:11:25 +0000403
404private:
405 void splitInnerLoopLatch(Instruction *);
Karthik Bhat88db86d2015-03-06 10:11:25 +0000406 void splitInnerLoopHeader();
407 bool adjustLoopLinks();
408 void adjustLoopPreheaders();
Karthik Bhat88db86d2015-03-06 10:11:25 +0000409 bool adjustLoopBranches();
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000410 void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred,
411 BasicBlock *NewPred);
Karthik Bhat88db86d2015-03-06 10:11:25 +0000412
413 Loop *OuterLoop;
414 Loop *InnerLoop;
415
416 /// Scev analysis.
417 ScalarEvolution *SE;
418 LoopInfo *LI;
419 DominatorTree *DT;
420 BasicBlock *LoopExit;
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000421 bool InnerLoopHasReduction;
Karthik Bhat88db86d2015-03-06 10:11:25 +0000422};
423
Vikram TV74b41112015-12-09 05:16:24 +0000424// Main LoopInterchange Pass.
Karthik Bhat88db86d2015-03-06 10:11:25 +0000425struct LoopInterchange : public FunctionPass {
426 static char ID;
427 ScalarEvolution *SE;
428 LoopInfo *LI;
Chandler Carruth49c22192016-05-12 22:19:39 +0000429 DependenceInfo *DI;
Karthik Bhat88db86d2015-03-06 10:11:25 +0000430 DominatorTree *DT;
Justin Bogner843fb202015-12-15 19:40:57 +0000431 bool PreserveLCSSA;
Florian Hahnad993522017-07-15 13:13:19 +0000432 /// Interface to emit optimization remarks.
433 OptimizationRemarkEmitter *ORE;
434
Karthik Bhat88db86d2015-03-06 10:11:25 +0000435 LoopInterchange()
Chandler Carruth49c22192016-05-12 22:19:39 +0000436 : FunctionPass(ID), SE(nullptr), LI(nullptr), DI(nullptr), DT(nullptr) {
Karthik Bhat88db86d2015-03-06 10:11:25 +0000437 initializeLoopInterchangePass(*PassRegistry::getPassRegistry());
438 }
439
440 void getAnalysisUsage(AnalysisUsage &AU) const override {
Chandler Carruth2f1fd162015-08-17 02:08:17 +0000441 AU.addRequired<ScalarEvolutionWrapperPass>();
Chandler Carruth7b560d42015-09-09 17:55:00 +0000442 AU.addRequired<AAResultsWrapperPass>();
Karthik Bhat88db86d2015-03-06 10:11:25 +0000443 AU.addRequired<DominatorTreeWrapperPass>();
444 AU.addRequired<LoopInfoWrapperPass>();
Chandler Carruth49c22192016-05-12 22:19:39 +0000445 AU.addRequired<DependenceAnalysisWrapperPass>();
Karthik Bhat88db86d2015-03-06 10:11:25 +0000446 AU.addRequiredID(LoopSimplifyID);
447 AU.addRequiredID(LCSSAID);
Florian Hahnad993522017-07-15 13:13:19 +0000448 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
Karthik Bhat88db86d2015-03-06 10:11:25 +0000449 }
450
451 bool runOnFunction(Function &F) override {
Andrew Kaylor50271f72016-05-03 22:32:30 +0000452 if (skipFunction(F))
453 return false;
454
Chandler Carruth2f1fd162015-08-17 02:08:17 +0000455 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
Karthik Bhat88db86d2015-03-06 10:11:25 +0000456 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
Chandler Carruth49c22192016-05-12 22:19:39 +0000457 DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
Karthik Bhat88db86d2015-03-06 10:11:25 +0000458 auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
459 DT = DTWP ? &DTWP->getDomTree() : nullptr;
Florian Hahnad993522017-07-15 13:13:19 +0000460 ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
Justin Bogner843fb202015-12-15 19:40:57 +0000461 PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
462
Karthik Bhat88db86d2015-03-06 10:11:25 +0000463 // Build up a worklist of loop pairs to analyze.
464 SmallVector<LoopVector, 8> Worklist;
465
466 for (Loop *L : *LI)
467 populateWorklist(*L, Worklist);
468
Chad Rosiera4c42462016-09-12 13:24:47 +0000469 DEBUG(dbgs() << "Worklist size = " << Worklist.size() << "\n");
Karthik Bhat88db86d2015-03-06 10:11:25 +0000470 bool Changed = true;
471 while (!Worklist.empty()) {
472 LoopVector LoopList = Worklist.pop_back_val();
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000473 Changed = processLoopList(LoopList, F);
Karthik Bhat88db86d2015-03-06 10:11:25 +0000474 }
475 return Changed;
476 }
477
478 bool isComputableLoopNest(LoopVector LoopList) {
Benjamin Kramer135f7352016-06-26 12:28:59 +0000479 for (Loop *L : LoopList) {
Karthik Bhat88db86d2015-03-06 10:11:25 +0000480 const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
481 if (ExitCountOuter == SE->getCouldNotCompute()) {
Chad Rosierf7c76f92016-09-21 13:28:41 +0000482 DEBUG(dbgs() << "Couldn't compute backedge count\n");
Karthik Bhat88db86d2015-03-06 10:11:25 +0000483 return false;
484 }
485 if (L->getNumBackEdges() != 1) {
486 DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
487 return false;
488 }
489 if (!L->getExitingBlock()) {
Chad Rosierf7c76f92016-09-21 13:28:41 +0000490 DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
Karthik Bhat88db86d2015-03-06 10:11:25 +0000491 return false;
492 }
493 }
494 return true;
495 }
496
Benjamin Kramerc321e532016-06-08 19:09:22 +0000497 unsigned selectLoopForInterchange(const LoopVector &LoopList) {
Karthik Bhat88db86d2015-03-06 10:11:25 +0000498 // TODO: Add a better heuristic to select the loop to be interchanged based
Benjamin Kramerdf005cb2015-08-08 18:27:36 +0000499 // on the dependence matrix. Currently we select the innermost loop.
Karthik Bhat88db86d2015-03-06 10:11:25 +0000500 return LoopList.size() - 1;
501 }
502
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000503 bool processLoopList(LoopVector LoopList, Function &F) {
504
Karthik Bhat88db86d2015-03-06 10:11:25 +0000505 bool Changed = false;
Chad Rosier7ea0d392016-09-13 13:30:30 +0000506 unsigned LoopNestDepth = LoopList.size();
507 if (LoopNestDepth < 2) {
Karthik Bhat88db86d2015-03-06 10:11:25 +0000508 DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
509 return false;
510 }
Chad Rosier7ea0d392016-09-13 13:30:30 +0000511 if (LoopNestDepth > MaxLoopNestDepth) {
512 DEBUG(dbgs() << "Cannot handle loops of depth greater than "
513 << MaxLoopNestDepth << "\n");
514 return false;
515 }
Karthik Bhat88db86d2015-03-06 10:11:25 +0000516 if (!isComputableLoopNest(LoopList)) {
Chad Rosiera4c42462016-09-12 13:24:47 +0000517 DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
Karthik Bhat88db86d2015-03-06 10:11:25 +0000518 return false;
519 }
Chad Rosier7ea0d392016-09-13 13:30:30 +0000520
521 DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth << "\n");
522
523 CharMatrix DependencyMatrix;
Karthik Bhat88db86d2015-03-06 10:11:25 +0000524 Loop *OuterMostLoop = *(LoopList.begin());
Chad Rosier7ea0d392016-09-13 13:30:30 +0000525 if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
Chandler Carruth49c22192016-05-12 22:19:39 +0000526 OuterMostLoop, DI)) {
Chad Rosierf7c76f92016-09-21 13:28:41 +0000527 DEBUG(dbgs() << "Populating dependency matrix failed\n");
Karthik Bhat88db86d2015-03-06 10:11:25 +0000528 return false;
529 }
530#ifdef DUMP_DEP_MATRICIES
Chad Rosier58ede272016-09-14 16:43:19 +0000531 DEBUG(dbgs() << "Dependence before interchange\n");
Karthik Bhat88db86d2015-03-06 10:11:25 +0000532 printDepMatrix(DependencyMatrix);
533#endif
534
535 BasicBlock *OuterMostLoopLatch = OuterMostLoop->getLoopLatch();
536 BranchInst *OuterMostLoopLatchBI =
537 dyn_cast<BranchInst>(OuterMostLoopLatch->getTerminator());
538 if (!OuterMostLoopLatchBI)
539 return false;
540
541 // Since we currently do not handle LCSSA PHI's any failure in loop
542 // condition will now branch to LoopNestExit.
543 // TODO: This should be removed once we handle LCSSA PHI nodes.
544
545 // Get the Outermost loop exit.
546 BasicBlock *LoopNestExit;
547 if (OuterMostLoopLatchBI->getSuccessor(0) == OuterMostLoop->getHeader())
548 LoopNestExit = OuterMostLoopLatchBI->getSuccessor(1);
549 else
550 LoopNestExit = OuterMostLoopLatchBI->getSuccessor(0);
551
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000552 if (isa<PHINode>(LoopNestExit->begin())) {
553 DEBUG(dbgs() << "PHI Nodes in loop nest exit is not handled for now "
554 "since on failure all loops branch to loop nest exit.\n");
Karthik Bhat88db86d2015-03-06 10:11:25 +0000555 return false;
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000556 }
Karthik Bhat88db86d2015-03-06 10:11:25 +0000557
558 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
Benjamin Kramerdf005cb2015-08-08 18:27:36 +0000559 // Move the selected loop outwards to the best possible position.
Karthik Bhat88db86d2015-03-06 10:11:25 +0000560 for (unsigned i = SelecLoopId; i > 0; i--) {
561 bool Interchanged =
562 processLoop(LoopList, i, i - 1, LoopNestExit, DependencyMatrix);
563 if (!Interchanged)
564 return Changed;
565 // Loops interchanged reflect the same in LoopList
Benjamin Kramer79442922015-03-06 18:59:14 +0000566 std::swap(LoopList[i - 1], LoopList[i]);
Karthik Bhat88db86d2015-03-06 10:11:25 +0000567
568 // Update the DependencyMatrix
Chad Rosierd18ea062016-09-13 13:00:29 +0000569 interChangeDependencies(DependencyMatrix, i, i - 1);
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000570 DT->recalculate(F);
Karthik Bhat88db86d2015-03-06 10:11:25 +0000571#ifdef DUMP_DEP_MATRICIES
Chad Rosier58ede272016-09-14 16:43:19 +0000572 DEBUG(dbgs() << "Dependence after interchange\n");
Karthik Bhat88db86d2015-03-06 10:11:25 +0000573 printDepMatrix(DependencyMatrix);
574#endif
575 Changed |= Interchanged;
576 }
577 return Changed;
578 }
579
580 bool processLoop(LoopVector LoopList, unsigned InnerLoopId,
581 unsigned OuterLoopId, BasicBlock *LoopNestExit,
582 std::vector<std::vector<char>> &DependencyMatrix) {
583
Chad Rosier13bc0d192016-09-07 18:15:12 +0000584 DEBUG(dbgs() << "Processing Inner Loop Id = " << InnerLoopId
Karthik Bhat88db86d2015-03-06 10:11:25 +0000585 << " and OuterLoopId = " << OuterLoopId << "\n");
586 Loop *InnerLoop = LoopList[InnerLoopId];
587 Loop *OuterLoop = LoopList[OuterLoopId];
588
Justin Bogner843fb202015-12-15 19:40:57 +0000589 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, LI, DT,
Florian Hahnad993522017-07-15 13:13:19 +0000590 PreserveLCSSA, ORE);
Karthik Bhat88db86d2015-03-06 10:11:25 +0000591 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
592 DEBUG(dbgs() << "Not interchanging Loops. Cannot prove legality\n");
593 return false;
594 }
595 DEBUG(dbgs() << "Loops are legal to interchange\n");
Florian Hahnad993522017-07-15 13:13:19 +0000596 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
Karthik Bhat88db86d2015-03-06 10:11:25 +0000597 if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
Chad Rosierf7c76f92016-09-21 13:28:41 +0000598 DEBUG(dbgs() << "Interchanging loops not profitable\n");
Karthik Bhat88db86d2015-03-06 10:11:25 +0000599 return false;
600 }
601
Florian Hahnad993522017-07-15 13:13:19 +0000602 ORE->emit(OptimizationRemark(DEBUG_TYPE, "Interchanged",
603 InnerLoop->getStartLoc(),
604 InnerLoop->getHeader())
605 << "Loop interchanged with enclosing loop.");
606
Justin Bogner843fb202015-12-15 19:40:57 +0000607 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT,
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000608 LoopNestExit, LIL.hasInnerLoopReduction());
Karthik Bhat88db86d2015-03-06 10:11:25 +0000609 LIT.transform();
610 DEBUG(dbgs() << "Loops interchanged\n");
611 return true;
612 }
613};
614
615} // end of namespace
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000616bool LoopInterchangeLegality::areAllUsesReductions(Instruction *Ins, Loop *L) {
David Majnemer0a16c222016-08-11 21:15:00 +0000617 return none_of(Ins->users(), [=](User *U) -> bool {
618 auto *UserIns = dyn_cast<PHINode>(U);
Tyler Nowicki0a913102015-06-16 18:07:34 +0000619 RecurrenceDescriptor RD;
620 return !UserIns || !RecurrenceDescriptor::isReductionPHI(UserIns, L, RD);
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000621 });
622}
Karthik Bhat88db86d2015-03-06 10:11:25 +0000623
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000624bool LoopInterchangeLegality::containsUnsafeInstructionsInHeader(
625 BasicBlock *BB) {
Karthik Bhat88db86d2015-03-06 10:11:25 +0000626 for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000627 // Load corresponding to reduction PHI's are safe while concluding if
628 // tightly nested.
629 if (LoadInst *L = dyn_cast<LoadInst>(I)) {
630 if (!areAllUsesReductions(L, InnerLoop))
631 return true;
632 } else if (I->mayHaveSideEffects() || I->mayReadFromMemory())
633 return true;
634 }
635 return false;
636}
637
638bool LoopInterchangeLegality::containsUnsafeInstructionsInLatch(
639 BasicBlock *BB) {
640 for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
641 // Stores corresponding to reductions are safe while concluding if tightly
642 // nested.
643 if (StoreInst *L = dyn_cast<StoreInst>(I)) {
Chad Rosierf7c76f92016-09-21 13:28:41 +0000644 if (!isa<PHINode>(L->getOperand(0)))
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000645 return true;
646 } else if (I->mayHaveSideEffects() || I->mayReadFromMemory())
Karthik Bhat88db86d2015-03-06 10:11:25 +0000647 return true;
648 }
649 return false;
650}
651
652bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
653 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
654 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
655 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
656
Chad Rosierf7c76f92016-09-21 13:28:41 +0000657 DEBUG(dbgs() << "Checking if loops are tightly nested\n");
Karthik Bhat88db86d2015-03-06 10:11:25 +0000658
659 // A perfectly nested loop will not have any branch in between the outer and
660 // inner block i.e. outer header will branch to either inner preheader and
661 // outerloop latch.
Chad Rosierf7c76f92016-09-21 13:28:41 +0000662 BranchInst *OuterLoopHeaderBI =
Karthik Bhat88db86d2015-03-06 10:11:25 +0000663 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
Chad Rosierf7c76f92016-09-21 13:28:41 +0000664 if (!OuterLoopHeaderBI)
Karthik Bhat88db86d2015-03-06 10:11:25 +0000665 return false;
Chad Rosierf7c76f92016-09-21 13:28:41 +0000666
667 for (unsigned i = 0, e = OuterLoopHeaderBI->getNumSuccessors(); i < e; ++i) {
668 if (OuterLoopHeaderBI->getSuccessor(i) != InnerLoopPreHeader &&
669 OuterLoopHeaderBI->getSuccessor(i) != OuterLoopLatch)
Karthik Bhat88db86d2015-03-06 10:11:25 +0000670 return false;
671 }
672
Chad Rosierf7c76f92016-09-21 13:28:41 +0000673 DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
Karthik Bhat88db86d2015-03-06 10:11:25 +0000674 // We do not have any basic block in between now make sure the outer header
Benjamin Kramerdf005cb2015-08-08 18:27:36 +0000675 // and outer loop latch doesn't contain any unsafe instructions.
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000676 if (containsUnsafeInstructionsInHeader(OuterLoopHeader) ||
677 containsUnsafeInstructionsInLatch(OuterLoopLatch))
Karthik Bhat88db86d2015-03-06 10:11:25 +0000678 return false;
679
Chad Rosierf7c76f92016-09-21 13:28:41 +0000680 DEBUG(dbgs() << "Loops are perfectly nested\n");
Karthik Bhat88db86d2015-03-06 10:11:25 +0000681 // We have a perfect loop nest.
682 return true;
683}
684
Karthik Bhat88db86d2015-03-06 10:11:25 +0000685
686bool LoopInterchangeLegality::isLoopStructureUnderstood(
687 PHINode *InnerInduction) {
688
689 unsigned Num = InnerInduction->getNumOperands();
690 BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
691 for (unsigned i = 0; i < Num; ++i) {
692 Value *Val = InnerInduction->getOperand(i);
693 if (isa<Constant>(Val))
694 continue;
695 Instruction *I = dyn_cast<Instruction>(Val);
696 if (!I)
697 return false;
698 // TODO: Handle triangular loops.
699 // e.g. for(int i=0;i<N;i++)
700 // for(int j=i;j<N;j++)
701 unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
702 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
703 InnerLoopPreheader &&
704 !OuterLoop->isLoopInvariant(I)) {
705 return false;
706 }
707 }
708 return true;
709}
710
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000711bool LoopInterchangeLegality::findInductionAndReductions(
712 Loop *L, SmallVector<PHINode *, 8> &Inductions,
713 SmallVector<PHINode *, 8> &Reductions) {
714 if (!L->getLoopLatch() || !L->getLoopPredecessor())
715 return false;
716 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
Tyler Nowicki0a913102015-06-16 18:07:34 +0000717 RecurrenceDescriptor RD;
James Molloy1bbf15c2015-08-27 09:53:00 +0000718 InductionDescriptor ID;
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000719 PHINode *PHI = cast<PHINode>(I);
Elena Demikhovsky376a18b2016-07-24 07:24:54 +0000720 if (InductionDescriptor::isInductionPHI(PHI, L, SE, ID))
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000721 Inductions.push_back(PHI);
Tyler Nowicki0a913102015-06-16 18:07:34 +0000722 else if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000723 Reductions.push_back(PHI);
724 else {
725 DEBUG(
726 dbgs() << "Failed to recognize PHI as an induction or reduction.\n");
727 return false;
728 }
729 }
730 return true;
731}
732
733static bool containsSafePHI(BasicBlock *Block, bool isOuterLoopExitBlock) {
734 for (auto I = Block->begin(); isa<PHINode>(I); ++I) {
735 PHINode *PHI = cast<PHINode>(I);
736 // Reduction lcssa phi will have only 1 incoming block that from loop latch.
737 if (PHI->getNumIncomingValues() > 1)
738 return false;
739 Instruction *Ins = dyn_cast<Instruction>(PHI->getIncomingValue(0));
740 if (!Ins)
741 return false;
742 // Incoming value for lcssa phi's in outer loop exit can only be inner loop
743 // exits lcssa phi else it would not be tightly nested.
744 if (!isa<PHINode>(Ins) && isOuterLoopExitBlock)
745 return false;
746 }
747 return true;
748}
749
750static BasicBlock *getLoopLatchExitBlock(BasicBlock *LatchBlock,
751 BasicBlock *LoopHeader) {
752 if (BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator())) {
753 unsigned Num = BI->getNumSuccessors();
754 assert(Num == 2);
755 for (unsigned i = 0; i < Num; ++i) {
756 if (BI->getSuccessor(i) == LoopHeader)
757 continue;
758 return BI->getSuccessor(i);
759 }
760 }
761 return nullptr;
762}
763
Karthik Bhat88db86d2015-03-06 10:11:25 +0000764// This function indicates the current limitations in the transform as a result
765// of which we do not proceed.
766bool LoopInterchangeLegality::currentLimitations() {
767
768 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
769 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
Karthik Bhat88db86d2015-03-06 10:11:25 +0000770 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
771 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000772 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
Karthik Bhat88db86d2015-03-06 10:11:25 +0000773
774 PHINode *InnerInductionVar;
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000775 SmallVector<PHINode *, 8> Inductions;
776 SmallVector<PHINode *, 8> Reductions;
Florian Hahn4eeff392017-07-03 15:32:00 +0000777 if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) {
778 DEBUG(dbgs() << "Only inner loops with induction or reduction PHI nodes "
779 << "are supported currently.\n");
Florian Hahnad993522017-07-15 13:13:19 +0000780 ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
781 "UnsupportedPHIInner",
782 InnerLoop->getStartLoc(),
783 InnerLoop->getHeader())
784 << "Only inner loops with induction or reduction PHI nodes can be"
785 " interchange currently.");
Karthik Bhat88db86d2015-03-06 10:11:25 +0000786 return true;
Florian Hahn4eeff392017-07-03 15:32:00 +0000787 }
Karthik Bhat88db86d2015-03-06 10:11:25 +0000788
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000789 // TODO: Currently we handle only loops with 1 induction variable.
790 if (Inductions.size() != 1) {
791 DEBUG(dbgs() << "We currently only support loops with 1 induction variable."
792 << "Failed to interchange due to current limitation\n");
Florian Hahnad993522017-07-15 13:13:19 +0000793 ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
794 "MultiInductionInner",
795 InnerLoop->getStartLoc(),
796 InnerLoop->getHeader())
797 << "Only inner loops with 1 induction variable can be "
798 "interchanged currently.");
Karthik Bhat88db86d2015-03-06 10:11:25 +0000799 return true;
800 }
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000801 if (Reductions.size() > 0)
802 InnerLoopHasReduction = true;
803
804 InnerInductionVar = Inductions.pop_back_val();
805 Reductions.clear();
Florian Hahn4eeff392017-07-03 15:32:00 +0000806 if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) {
807 DEBUG(dbgs() << "Only outer loops with induction or reduction PHI nodes "
808 << "are supported currently.\n");
Florian Hahnad993522017-07-15 13:13:19 +0000809 ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
810 "UnsupportedPHIOuter",
811 OuterLoop->getStartLoc(),
812 OuterLoop->getHeader())
813 << "Only outer loops with induction or reduction PHI nodes can be"
814 " interchanged currently.");
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000815 return true;
Florian Hahn4eeff392017-07-03 15:32:00 +0000816 }
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000817
818 // Outer loop cannot have reduction because then loops will not be tightly
819 // nested.
Florian Hahn4eeff392017-07-03 15:32:00 +0000820 if (!Reductions.empty()) {
821 DEBUG(dbgs() << "Outer loops with reductions are not supported "
822 << "currently.\n");
Florian Hahnad993522017-07-15 13:13:19 +0000823 ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
824 "ReductionsOuter",
825 OuterLoop->getStartLoc(),
826 OuterLoop->getHeader())
827 << "Outer loops with reductions cannot be interchangeed "
828 "currently.");
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000829 return true;
Florian Hahn4eeff392017-07-03 15:32:00 +0000830 }
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000831 // TODO: Currently we handle only loops with 1 induction variable.
Florian Hahn4eeff392017-07-03 15:32:00 +0000832 if (Inductions.size() != 1) {
833 DEBUG(dbgs() << "Loops with more than 1 induction variables are not "
834 << "supported currently.\n");
Florian Hahnad993522017-07-15 13:13:19 +0000835 ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
836 "MultiIndutionOuter",
837 OuterLoop->getStartLoc(),
838 OuterLoop->getHeader())
839 << "Only outer loops with 1 induction variable can be "
840 "interchanged currently.");
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000841 return true;
Florian Hahn4eeff392017-07-03 15:32:00 +0000842 }
Karthik Bhat88db86d2015-03-06 10:11:25 +0000843
844 // TODO: Triangular loops are not handled for now.
845 if (!isLoopStructureUnderstood(InnerInductionVar)) {
846 DEBUG(dbgs() << "Loop structure not understood by pass\n");
Florian Hahnad993522017-07-15 13:13:19 +0000847 ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
848 "UnsupportedStructureInner",
849 InnerLoop->getStartLoc(),
850 InnerLoop->getHeader())
851 << "Inner loop structure not understood currently.");
Karthik Bhat88db86d2015-03-06 10:11:25 +0000852 return true;
853 }
854
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000855 // TODO: We only handle LCSSA PHI's corresponding to reduction for now.
856 BasicBlock *LoopExitBlock =
857 getLoopLatchExitBlock(OuterLoopLatch, OuterLoopHeader);
Florian Hahn4eeff392017-07-03 15:32:00 +0000858 if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, true)) {
859 DEBUG(dbgs() << "Can only handle LCSSA PHIs in outer loops currently.\n");
Florian Hahnad993522017-07-15 13:13:19 +0000860 ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
861 "NoLCSSAPHIOuter",
862 OuterLoop->getStartLoc(),
863 OuterLoop->getHeader())
864 << "Only outer loops with LCSSA PHIs can be interchange "
865 "currently.");
Karthik Bhat88db86d2015-03-06 10:11:25 +0000866 return true;
Florian Hahn4eeff392017-07-03 15:32:00 +0000867 }
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000868
869 LoopExitBlock = getLoopLatchExitBlock(InnerLoopLatch, InnerLoopHeader);
Florian Hahn4eeff392017-07-03 15:32:00 +0000870 if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, false)) {
871 DEBUG(dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n");
Florian Hahnad993522017-07-15 13:13:19 +0000872 ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
873 "NoLCSSAPHIOuterInner",
874 InnerLoop->getStartLoc(),
875 InnerLoop->getHeader())
876 << "Only inner loops with LCSSA PHIs can be interchange "
877 "currently.");
Karthik Bhat88db86d2015-03-06 10:11:25 +0000878 return true;
Florian Hahn4eeff392017-07-03 15:32:00 +0000879 }
Karthik Bhat88db86d2015-03-06 10:11:25 +0000880
881 // TODO: Current limitation: Since we split the inner loop latch at the point
882 // were induction variable is incremented (induction.next); We cannot have
883 // more than 1 user of induction.next since it would result in broken code
884 // after split.
885 // e.g.
886 // for(i=0;i<N;i++) {
887 // for(j = 0;j<M;j++) {
888 // A[j+1][i+2] = A[j][i]+k;
889 // }
890 // }
Karthik Bhat88db86d2015-03-06 10:11:25 +0000891 Instruction *InnerIndexVarInc = nullptr;
892 if (InnerInductionVar->getIncomingBlock(0) == InnerLoopPreHeader)
893 InnerIndexVarInc =
894 dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(1));
895 else
896 InnerIndexVarInc =
897 dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0));
898
Florian Hahn4eeff392017-07-03 15:32:00 +0000899 if (!InnerIndexVarInc) {
900 DEBUG(dbgs() << "Did not find an instruction to increment the induction "
901 << "variable.\n");
Florian Hahnad993522017-07-15 13:13:19 +0000902 ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
903 "NoIncrementInInner",
904 InnerLoop->getStartLoc(),
905 InnerLoop->getHeader())
906 << "The inner loop does not increment the induction variable.");
Pete Cooper11bd9582015-07-27 18:37:58 +0000907 return true;
Florian Hahn4eeff392017-07-03 15:32:00 +0000908 }
Pete Cooper11bd9582015-07-27 18:37:58 +0000909
Karthik Bhat88db86d2015-03-06 10:11:25 +0000910 // Since we split the inner loop latch on this induction variable. Make sure
911 // we do not have any instruction between the induction variable and branch
912 // instruction.
913
David Majnemerd7708772016-06-24 04:05:21 +0000914 bool FoundInduction = false;
915 for (const Instruction &I : reverse(*InnerLoopLatch)) {
916 if (isa<BranchInst>(I) || isa<CmpInst>(I) || isa<TruncInst>(I))
Karthik Bhat88db86d2015-03-06 10:11:25 +0000917 continue;
Florian Hahn4eeff392017-07-03 15:32:00 +0000918
Karthik Bhat88db86d2015-03-06 10:11:25 +0000919 // We found an instruction. If this is not induction variable then it is not
920 // safe to split this loop latch.
Florian Hahn4eeff392017-07-03 15:32:00 +0000921 if (!I.isIdenticalTo(InnerIndexVarInc)) {
922 DEBUG(dbgs() << "Found unsupported instructions between induction "
923 << "variable increment and branch.\n");
Florian Hahnad993522017-07-15 13:13:19 +0000924 ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
925 "UnsupportedInsBetweenInduction",
926 InnerLoop->getStartLoc(),
927 InnerLoop->getHeader())
928 << "Found unsupported instruction between induction variable "
929 "increment and branch.");
Karthik Bhat88db86d2015-03-06 10:11:25 +0000930 return true;
Florian Hahn4eeff392017-07-03 15:32:00 +0000931 }
David Majnemerd7708772016-06-24 04:05:21 +0000932
933 FoundInduction = true;
934 break;
Karthik Bhat88db86d2015-03-06 10:11:25 +0000935 }
Benjamin Kramerdf005cb2015-08-08 18:27:36 +0000936 // The loop latch ended and we didn't find the induction variable return as
Karthik Bhat88db86d2015-03-06 10:11:25 +0000937 // current limitation.
Florian Hahn4eeff392017-07-03 15:32:00 +0000938 if (!FoundInduction) {
939 DEBUG(dbgs() << "Did not find the induction variable.\n");
Florian Hahnad993522017-07-15 13:13:19 +0000940 ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
941 "NoIndutionVariable",
942 InnerLoop->getStartLoc(),
943 InnerLoop->getHeader())
944 << "Did not find the induction variable.");
Karthik Bhat88db86d2015-03-06 10:11:25 +0000945 return true;
Florian Hahn4eeff392017-07-03 15:32:00 +0000946 }
Karthik Bhat88db86d2015-03-06 10:11:25 +0000947 return false;
948}
949
950bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
951 unsigned OuterLoopId,
952 CharMatrix &DepMatrix) {
953
954 if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
955 DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
Chad Rosier58ede272016-09-14 16:43:19 +0000956 << " and OuterLoopId = " << OuterLoopId
957 << " due to dependence\n");
Florian Hahnad993522017-07-15 13:13:19 +0000958 ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
959 "Dependence",
960 InnerLoop->getStartLoc(),
961 InnerLoop->getHeader())
962 << "Cannot interchange loops due to dependences.");
Karthik Bhat88db86d2015-03-06 10:11:25 +0000963 return false;
964 }
965
966 // Create unique Preheaders if we already do not have one.
967 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
968 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
969
970 // Create a unique outer preheader -
971 // 1) If OuterLoop preheader is not present.
972 // 2) If OuterLoop Preheader is same as OuterLoop Header
973 // 3) If OuterLoop Preheader is same as Header of the previous loop.
974 // 4) If OuterLoop Preheader is Entry node.
975 if (!OuterLoopPreHeader || OuterLoopPreHeader == OuterLoop->getHeader() ||
976 isa<PHINode>(OuterLoopPreHeader->begin()) ||
977 !OuterLoopPreHeader->getUniquePredecessor()) {
Justin Bogner843fb202015-12-15 19:40:57 +0000978 OuterLoopPreHeader =
979 InsertPreheaderForLoop(OuterLoop, DT, LI, PreserveLCSSA);
Karthik Bhat88db86d2015-03-06 10:11:25 +0000980 }
981
982 if (!InnerLoopPreHeader || InnerLoopPreHeader == InnerLoop->getHeader() ||
983 InnerLoopPreHeader == OuterLoop->getHeader()) {
Justin Bogner843fb202015-12-15 19:40:57 +0000984 InnerLoopPreHeader =
985 InsertPreheaderForLoop(InnerLoop, DT, LI, PreserveLCSSA);
Karthik Bhat88db86d2015-03-06 10:11:25 +0000986 }
987
Karthik Bhat88db86d2015-03-06 10:11:25 +0000988 // TODO: The loops could not be interchanged due to current limitations in the
989 // transform module.
990 if (currentLimitations()) {
991 DEBUG(dbgs() << "Not legal because of current transform limitation\n");
992 return false;
993 }
994
Karthik Bhat8210fdf2015-04-23 04:51:44 +0000995 // Check if the loops are tightly nested.
996 if (!tightlyNested(OuterLoop, InnerLoop)) {
997 DEBUG(dbgs() << "Loops not tightly nested\n");
Florian Hahnad993522017-07-15 13:13:19 +0000998 ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
999 "NotTightlyNested",
1000 InnerLoop->getStartLoc(),
1001 InnerLoop->getHeader())
1002 << "Cannot interchange loops because they are not tightly "
1003 "nested.");
Karthik Bhat8210fdf2015-04-23 04:51:44 +00001004 return false;
1005 }
1006
Karthik Bhat88db86d2015-03-06 10:11:25 +00001007 return true;
1008}
1009
1010int LoopInterchangeProfitability::getInstrOrderCost() {
1011 unsigned GoodOrder, BadOrder;
1012 BadOrder = GoodOrder = 0;
1013 for (auto BI = InnerLoop->block_begin(), BE = InnerLoop->block_end();
1014 BI != BE; ++BI) {
Benjamin Kramer135f7352016-06-26 12:28:59 +00001015 for (Instruction &Ins : **BI) {
Karthik Bhat88db86d2015-03-06 10:11:25 +00001016 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
1017 unsigned NumOp = GEP->getNumOperands();
1018 bool FoundInnerInduction = false;
1019 bool FoundOuterInduction = false;
1020 for (unsigned i = 0; i < NumOp; ++i) {
1021 const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
1022 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
1023 if (!AR)
1024 continue;
1025
1026 // If we find the inner induction after an outer induction e.g.
1027 // for(int i=0;i<N;i++)
1028 // for(int j=0;j<N;j++)
1029 // A[i][j] = A[i-1][j-1]+k;
1030 // then it is a good order.
1031 if (AR->getLoop() == InnerLoop) {
1032 // We found an InnerLoop induction after OuterLoop induction. It is
1033 // a good order.
1034 FoundInnerInduction = true;
1035 if (FoundOuterInduction) {
1036 GoodOrder++;
1037 break;
1038 }
1039 }
1040 // If we find the outer induction after an inner induction e.g.
1041 // for(int i=0;i<N;i++)
1042 // for(int j=0;j<N;j++)
1043 // A[j][i] = A[j-1][i-1]+k;
1044 // then it is a bad order.
1045 if (AR->getLoop() == OuterLoop) {
1046 // We found an OuterLoop induction after InnerLoop induction. It is
1047 // a bad order.
1048 FoundOuterInduction = true;
1049 if (FoundInnerInduction) {
1050 BadOrder++;
1051 break;
1052 }
1053 }
1054 }
1055 }
1056 }
1057 }
1058 return GoodOrder - BadOrder;
1059}
1060
Chad Rosiere6b3a632016-09-14 17:12:30 +00001061static bool isProfitableForVectorization(unsigned InnerLoopId,
1062 unsigned OuterLoopId,
1063 CharMatrix &DepMatrix) {
Karthik Bhat88db86d2015-03-06 10:11:25 +00001064 // TODO: Improve this heuristic to catch more cases.
1065 // If the inner loop is loop independent or doesn't carry any dependency it is
1066 // profitable to move this to outer position.
1067 unsigned Row = DepMatrix.size();
1068 for (unsigned i = 0; i < Row; ++i) {
1069 if (DepMatrix[i][InnerLoopId] != 'S' && DepMatrix[i][InnerLoopId] != 'I')
1070 return false;
1071 // TODO: We need to improve this heuristic.
1072 if (DepMatrix[i][OuterLoopId] != '=')
1073 return false;
1074 }
1075 // If outer loop has dependence and inner loop is loop independent then it is
1076 // profitable to interchange to enable parallelism.
1077 return true;
1078}
1079
1080bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
1081 unsigned OuterLoopId,
1082 CharMatrix &DepMatrix) {
1083
Benjamin Kramerdf005cb2015-08-08 18:27:36 +00001084 // TODO: Add better profitability checks.
Karthik Bhat88db86d2015-03-06 10:11:25 +00001085 // e.g
1086 // 1) Construct dependency matrix and move the one with no loop carried dep
1087 // inside to enable vectorization.
1088
1089 // This is rough cost estimation algorithm. It counts the good and bad order
1090 // of induction variables in the instruction and allows reordering if number
1091 // of bad orders is more than good.
Chad Rosier72431892016-09-14 17:07:13 +00001092 int Cost = getInstrOrderCost();
Karthik Bhat88db86d2015-03-06 10:11:25 +00001093 DEBUG(dbgs() << "Cost = " << Cost << "\n");
Chad Rosier72431892016-09-14 17:07:13 +00001094 if (Cost < -LoopInterchangeCostThreshold)
Karthik Bhat88db86d2015-03-06 10:11:25 +00001095 return true;
1096
Benjamin Kramerdf005cb2015-08-08 18:27:36 +00001097 // It is not profitable as per current cache profitability model. But check if
Karthik Bhat88db86d2015-03-06 10:11:25 +00001098 // we can move this loop outside to improve parallelism.
Florian Hahnad993522017-07-15 13:13:19 +00001099 if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix))
1100 return true;
1101
1102 ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
1103 "InterchangeNotProfitable",
1104 InnerLoop->getStartLoc(),
1105 InnerLoop->getHeader())
1106 << "Interchanging loops is too costly (cost="
1107 << ore::NV("Cost", Cost) << ", threshold="
1108 << ore::NV("Threshold", LoopInterchangeCostThreshold) <<
1109 ") and it does not improve parallelism.");
1110 return false;
Karthik Bhat88db86d2015-03-06 10:11:25 +00001111}
1112
1113void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1114 Loop *InnerLoop) {
Daniel Jasper6adbd7a2015-03-06 10:39:14 +00001115 for (Loop::iterator I = OuterLoop->begin(), E = OuterLoop->end(); I != E;
1116 ++I) {
Karthik Bhat88db86d2015-03-06 10:11:25 +00001117 if (*I == InnerLoop) {
1118 OuterLoop->removeChildLoop(I);
1119 return;
1120 }
1121 }
Benjamin Kramer8ceb3232015-10-25 22:28:27 +00001122 llvm_unreachable("Couldn't find loop");
Karthik Bhat88db86d2015-03-06 10:11:25 +00001123}
Daniel Jasper6adbd7a2015-03-06 10:39:14 +00001124
Karthik Bhat88db86d2015-03-06 10:11:25 +00001125void LoopInterchangeTransform::restructureLoops(Loop *InnerLoop,
1126 Loop *OuterLoop) {
1127 Loop *OuterLoopParent = OuterLoop->getParentLoop();
1128 if (OuterLoopParent) {
1129 // Remove the loop from its parent loop.
1130 removeChildLoop(OuterLoopParent, OuterLoop);
1131 removeChildLoop(OuterLoop, InnerLoop);
1132 OuterLoopParent->addChildLoop(InnerLoop);
1133 } else {
1134 removeChildLoop(OuterLoop, InnerLoop);
1135 LI->changeTopLevelLoop(OuterLoop, InnerLoop);
1136 }
1137
Andrew Kaylor08c5f1e2015-04-24 17:39:16 +00001138 while (!InnerLoop->empty())
1139 OuterLoop->addChildLoop(InnerLoop->removeChildLoop(InnerLoop->begin()));
Karthik Bhat88db86d2015-03-06 10:11:25 +00001140
1141 InnerLoop->addChildLoop(OuterLoop);
1142}
1143
1144bool LoopInterchangeTransform::transform() {
Karthik Bhat88db86d2015-03-06 10:11:25 +00001145 bool Transformed = false;
1146 Instruction *InnerIndexVar;
1147
1148 if (InnerLoop->getSubLoops().size() == 0) {
1149 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1150 DEBUG(dbgs() << "Calling Split Inner Loop\n");
1151 PHINode *InductionPHI = getInductionVariable(InnerLoop, SE);
1152 if (!InductionPHI) {
1153 DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
1154 return false;
1155 }
1156
1157 if (InductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1158 InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(1));
1159 else
1160 InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(0));
1161
1162 //
1163 // Split at the place were the induction variable is
1164 // incremented/decremented.
1165 // TODO: This splitting logic may not work always. Fix this.
1166 splitInnerLoopLatch(InnerIndexVar);
Chad Rosierf7c76f92016-09-21 13:28:41 +00001167 DEBUG(dbgs() << "splitInnerLoopLatch done\n");
Karthik Bhat88db86d2015-03-06 10:11:25 +00001168
Benjamin Kramerdf005cb2015-08-08 18:27:36 +00001169 // Splits the inner loops phi nodes out into a separate basic block.
Karthik Bhat88db86d2015-03-06 10:11:25 +00001170 splitInnerLoopHeader();
Chad Rosierf7c76f92016-09-21 13:28:41 +00001171 DEBUG(dbgs() << "splitInnerLoopHeader done\n");
Karthik Bhat88db86d2015-03-06 10:11:25 +00001172 }
1173
1174 Transformed |= adjustLoopLinks();
1175 if (!Transformed) {
Chad Rosierf7c76f92016-09-21 13:28:41 +00001176 DEBUG(dbgs() << "adjustLoopLinks failed\n");
Karthik Bhat88db86d2015-03-06 10:11:25 +00001177 return false;
1178 }
1179
1180 restructureLoops(InnerLoop, OuterLoop);
1181 return true;
1182}
1183
Benjamin Kramer79442922015-03-06 18:59:14 +00001184void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) {
Karthik Bhat88db86d2015-03-06 10:11:25 +00001185 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
Karthik Bhat88db86d2015-03-06 10:11:25 +00001186 BasicBlock *InnerLoopLatchPred = InnerLoopLatch;
Benjamin Kramer79442922015-03-06 18:59:14 +00001187 InnerLoopLatch = SplitBlock(InnerLoopLatchPred, Inc, DT, LI);
Karthik Bhat88db86d2015-03-06 10:11:25 +00001188}
1189
Karthik Bhat88db86d2015-03-06 10:11:25 +00001190void LoopInterchangeTransform::splitInnerLoopHeader() {
1191
Karthik Bhat8210fdf2015-04-23 04:51:44 +00001192 // Split the inner loop header out. Here make sure that the reduction PHI's
1193 // stay in the innerloop body.
Karthik Bhat88db86d2015-03-06 10:11:25 +00001194 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
Karthik Bhat8210fdf2015-04-23 04:51:44 +00001195 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1196 if (InnerLoopHasReduction) {
1197 // FIXME: Check if the induction PHI will always be the first PHI.
1198 BasicBlock *New = InnerLoopHeader->splitBasicBlock(
1199 ++(InnerLoopHeader->begin()), InnerLoopHeader->getName() + ".split");
1200 if (LI)
1201 if (Loop *L = LI->getLoopFor(InnerLoopHeader))
1202 L->addBasicBlockToLoop(New, *LI);
1203
1204 // Adjust Reduction PHI's in the block.
1205 SmallVector<PHINode *, 8> PHIVec;
1206 for (auto I = New->begin(); isa<PHINode>(I); ++I) {
1207 PHINode *PHI = dyn_cast<PHINode>(I);
1208 Value *V = PHI->getIncomingValueForBlock(InnerLoopPreHeader);
1209 PHI->replaceAllUsesWith(V);
1210 PHIVec.push_back((PHI));
1211 }
Benjamin Kramer135f7352016-06-26 12:28:59 +00001212 for (PHINode *P : PHIVec) {
Karthik Bhat8210fdf2015-04-23 04:51:44 +00001213 P->eraseFromParent();
1214 }
1215 } else {
1216 SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
1217 }
Karthik Bhat88db86d2015-03-06 10:11:25 +00001218
1219 DEBUG(dbgs() << "Output of splitInnerLoopHeader InnerLoopHeaderSucc & "
Chad Rosierf7c76f92016-09-21 13:28:41 +00001220 "InnerLoopHeader\n");
Karthik Bhat88db86d2015-03-06 10:11:25 +00001221}
1222
Benjamin Kramer79442922015-03-06 18:59:14 +00001223/// \brief Move all instructions except the terminator from FromBB right before
1224/// InsertBefore
1225static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1226 auto &ToList = InsertBefore->getParent()->getInstList();
1227 auto &FromList = FromBB->getInstList();
1228
Duncan P. N. Exon Smithbe4d8cb2015-10-13 19:26:58 +00001229 ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(),
1230 FromBB->getTerminator()->getIterator());
Benjamin Kramer79442922015-03-06 18:59:14 +00001231}
1232
Karthik Bhat8210fdf2015-04-23 04:51:44 +00001233void LoopInterchangeTransform::updateIncomingBlock(BasicBlock *CurrBlock,
1234 BasicBlock *OldPred,
1235 BasicBlock *NewPred) {
1236 for (auto I = CurrBlock->begin(); isa<PHINode>(I); ++I) {
1237 PHINode *PHI = cast<PHINode>(I);
1238 unsigned Num = PHI->getNumIncomingValues();
1239 for (unsigned i = 0; i < Num; ++i) {
1240 if (PHI->getIncomingBlock(i) == OldPred)
1241 PHI->setIncomingBlock(i, NewPred);
1242 }
1243 }
1244}
1245
Karthik Bhat88db86d2015-03-06 10:11:25 +00001246bool LoopInterchangeTransform::adjustLoopBranches() {
1247
1248 DEBUG(dbgs() << "adjustLoopBranches called\n");
1249 // Adjust the loop preheader
1250 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1251 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1252 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1253 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1254 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1255 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1256 BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
1257 BasicBlock *InnerLoopLatchPredecessor =
1258 InnerLoopLatch->getUniquePredecessor();
1259 BasicBlock *InnerLoopLatchSuccessor;
1260 BasicBlock *OuterLoopLatchSuccessor;
1261
1262 BranchInst *OuterLoopLatchBI =
1263 dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
1264 BranchInst *InnerLoopLatchBI =
1265 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
1266 BranchInst *OuterLoopHeaderBI =
1267 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
1268 BranchInst *InnerLoopHeaderBI =
1269 dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
1270
1271 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1272 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1273 !InnerLoopHeaderBI)
1274 return false;
1275
1276 BranchInst *InnerLoopLatchPredecessorBI =
1277 dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
1278 BranchInst *OuterLoopPredecessorBI =
1279 dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
1280
1281 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1282 return false;
Benjamin Kramerdf005cb2015-08-08 18:27:36 +00001283 BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1284 if (!InnerLoopHeaderSuccessor)
Karthik Bhat88db86d2015-03-06 10:11:25 +00001285 return false;
1286
1287 // Adjust Loop Preheader and headers
1288
1289 unsigned NumSucc = OuterLoopPredecessorBI->getNumSuccessors();
1290 for (unsigned i = 0; i < NumSucc; ++i) {
1291 if (OuterLoopPredecessorBI->getSuccessor(i) == OuterLoopPreHeader)
1292 OuterLoopPredecessorBI->setSuccessor(i, InnerLoopPreHeader);
1293 }
1294
1295 NumSucc = OuterLoopHeaderBI->getNumSuccessors();
1296 for (unsigned i = 0; i < NumSucc; ++i) {
1297 if (OuterLoopHeaderBI->getSuccessor(i) == OuterLoopLatch)
1298 OuterLoopHeaderBI->setSuccessor(i, LoopExit);
1299 else if (OuterLoopHeaderBI->getSuccessor(i) == InnerLoopPreHeader)
Benjamin Kramerdf005cb2015-08-08 18:27:36 +00001300 OuterLoopHeaderBI->setSuccessor(i, InnerLoopHeaderSuccessor);
Karthik Bhat88db86d2015-03-06 10:11:25 +00001301 }
1302
Karthik Bhat8210fdf2015-04-23 04:51:44 +00001303 // Adjust reduction PHI's now that the incoming block has changed.
Benjamin Kramerdf005cb2015-08-08 18:27:36 +00001304 updateIncomingBlock(InnerLoopHeaderSuccessor, InnerLoopHeader,
Karthik Bhat8210fdf2015-04-23 04:51:44 +00001305 OuterLoopHeader);
1306
Karthik Bhat88db86d2015-03-06 10:11:25 +00001307 BranchInst::Create(OuterLoopPreHeader, InnerLoopHeaderBI);
1308 InnerLoopHeaderBI->eraseFromParent();
1309
1310 // -------------Adjust loop latches-----------
1311 if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
1312 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
1313 else
1314 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
1315
1316 NumSucc = InnerLoopLatchPredecessorBI->getNumSuccessors();
1317 for (unsigned i = 0; i < NumSucc; ++i) {
1318 if (InnerLoopLatchPredecessorBI->getSuccessor(i) == InnerLoopLatch)
1319 InnerLoopLatchPredecessorBI->setSuccessor(i, InnerLoopLatchSuccessor);
1320 }
1321
Karthik Bhat8210fdf2015-04-23 04:51:44 +00001322 // Adjust PHI nodes in InnerLoopLatchSuccessor. Update all uses of PHI with
1323 // the value and remove this PHI node from inner loop.
1324 SmallVector<PHINode *, 8> LcssaVec;
1325 for (auto I = InnerLoopLatchSuccessor->begin(); isa<PHINode>(I); ++I) {
1326 PHINode *LcssaPhi = cast<PHINode>(I);
1327 LcssaVec.push_back(LcssaPhi);
1328 }
Benjamin Kramer135f7352016-06-26 12:28:59 +00001329 for (PHINode *P : LcssaVec) {
Karthik Bhat8210fdf2015-04-23 04:51:44 +00001330 Value *Incoming = P->getIncomingValueForBlock(InnerLoopLatch);
1331 P->replaceAllUsesWith(Incoming);
1332 P->eraseFromParent();
1333 }
1334
Karthik Bhat88db86d2015-03-06 10:11:25 +00001335 if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
1336 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
1337 else
1338 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
1339
1340 if (InnerLoopLatchBI->getSuccessor(1) == InnerLoopLatchSuccessor)
1341 InnerLoopLatchBI->setSuccessor(1, OuterLoopLatchSuccessor);
1342 else
1343 InnerLoopLatchBI->setSuccessor(0, OuterLoopLatchSuccessor);
1344
Karthik Bhat8210fdf2015-04-23 04:51:44 +00001345 updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch);
1346
Karthik Bhat88db86d2015-03-06 10:11:25 +00001347 if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopLatchSuccessor) {
1348 OuterLoopLatchBI->setSuccessor(0, InnerLoopLatch);
1349 } else {
1350 OuterLoopLatchBI->setSuccessor(1, InnerLoopLatch);
1351 }
1352
1353 return true;
1354}
1355void LoopInterchangeTransform::adjustLoopPreheaders() {
1356
1357 // We have interchanged the preheaders so we need to interchange the data in
1358 // the preheader as well.
1359 // This is because the content of inner preheader was previously executed
1360 // inside the outer loop.
1361 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1362 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1363 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1364 BranchInst *InnerTermBI =
1365 cast<BranchInst>(InnerLoopPreHeader->getTerminator());
1366
Karthik Bhat88db86d2015-03-06 10:11:25 +00001367 // These instructions should now be executed inside the loop.
1368 // Move instruction into a new block after outer header.
Karthik Bhat8210fdf2015-04-23 04:51:44 +00001369 moveBBContents(InnerLoopPreHeader, OuterLoopHeader->getTerminator());
Karthik Bhat88db86d2015-03-06 10:11:25 +00001370 // These instructions were not executed previously in the loop so move them to
1371 // the older inner loop preheader.
Benjamin Kramer79442922015-03-06 18:59:14 +00001372 moveBBContents(OuterLoopPreHeader, InnerTermBI);
Karthik Bhat88db86d2015-03-06 10:11:25 +00001373}
1374
1375bool LoopInterchangeTransform::adjustLoopLinks() {
1376
1377 // Adjust all branches in the inner and outer loop.
1378 bool Changed = adjustLoopBranches();
1379 if (Changed)
1380 adjustLoopPreheaders();
1381 return Changed;
1382}
1383
1384char LoopInterchange::ID = 0;
1385INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange",
1386 "Interchanges loops for cache reuse", false, false)
Chandler Carruth7b560d42015-09-09 17:55:00 +00001387INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
Chandler Carruth49c22192016-05-12 22:19:39 +00001388INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
Karthik Bhat88db86d2015-03-06 10:11:25 +00001389INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
Chandler Carruth2f1fd162015-08-17 02:08:17 +00001390INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
Karthik Bhat88db86d2015-03-06 10:11:25 +00001391INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
Easwaran Ramane12c4872016-06-09 19:44:46 +00001392INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
Karthik Bhat88db86d2015-03-06 10:11:25 +00001393INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
Florian Hahnad993522017-07-15 13:13:19 +00001394INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
Karthik Bhat88db86d2015-03-06 10:11:25 +00001395
1396INITIALIZE_PASS_END(LoopInterchange, "loop-interchange",
1397 "Interchanges loops for cache reuse", false, false)
1398
1399Pass *llvm::createLoopInterchangePass() { return new LoopInterchange(); }