blob: 77dc5f5eee7406db85b922ced72c38b38cfbbfb5 [file] [log] [blame]
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +00001//===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9// This pass removes the computation of provably redundant expressions that have
10// been computed earlier in a previous iteration. It relies on the use of PHIs
11// to identify loop carried dependences. This is scalar replacement for vector
12// types.
13//
14//-----------------------------------------------------------------------------
15// Motivation: Consider the case where we have the following loop structure.
16//
17// Loop:
18// t0 = a[i];
19// t1 = f(t0);
20// t2 = g(t1);
21// ...
22// t3 = a[i+1];
23// t4 = f(t3);
24// t5 = g(t4);
25// t6 = op(t2, t5)
26// cond_branch <Loop>
27//
28// This can be converted to
29// t00 = a[0];
30// t10 = f(t00);
31// t20 = g(t10);
32// Loop:
33// t2 = t20;
34// t3 = a[i+1];
35// t4 = f(t3);
36// t5 = g(t4);
37// t6 = op(t2, t5)
38// t20 = t5
39// cond_branch <Loop>
40//
41// SROA does a good job of reusing a[i+1] as a[i] in the next iteration.
42// Such a loop comes to this pass in the following form.
43//
44// LoopPreheader:
45// X0 = a[0];
46// Loop:
47// X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
48// t1 = f(X2) <-- I1
49// t2 = g(t1)
50// ...
51// X1 = a[i+1]
52// t4 = f(X1) <-- I2
53// t5 = g(t4)
54// t6 = op(t2, t5)
55// cond_branch <Loop>
56//
57// In this pass, we look for PHIs such as X2 whose incoming values come only
58// from the Loop Preheader and over the backedge and additionaly, both these
59// values are the results of the same operation in terms of opcode. We call such
60// a PHI node a dependence chain or DepChain. In this case, the dependence of X2
61// over X1 is carried over only one iteration and so the DepChain is only one
62// PHI node long.
63//
64// Then, we traverse the uses of the PHI (X2) and the uses of the value of the
65// PHI coming over the backedge (X1). We stop at the first pair of such users
66// I1 (of X2) and I2 (of X1) that meet the following conditions.
67// 1. I1 and I2 are the same operation, but with different operands.
68// 2. X2 and X1 are used at the same operand number in the two instructions.
69// 3. All other operands Op1 of I1 and Op2 of I2 are also such that there is a
70// a DepChain from Op1 to Op2 of the same length as that between X2 and X1.
71//
72// We then make the following transformation
73// LoopPreheader:
74// X0 = a[0];
75// Y0 = f(X0);
76// Loop:
77// X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
78// Y2 = PHI<(Y0, LoopPreheader), (t4, Loop)>
79// t1 = f(X2) <-- Will be removed by DCE.
80// t2 = g(Y2)
81// ...
82// X1 = a[i+1]
83// t4 = f(X1)
84// t5 = g(t4)
85// t6 = op(t2, t5)
86// cond_branch <Loop>
87//
88// We proceed until we cannot find any more such instructions I1 and I2.
89//
90// --- DepChains & Loop carried dependences ---
91// Consider a single basic block loop such as
92//
93// LoopPreheader:
94// X0 = ...
95// Y0 = ...
96// Loop:
97// X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
98// Y2 = PHI<(Y0, LoopPreheader), (X2, Loop)>
99// ...
100// X1 = ...
101// ...
102// cond_branch <Loop>
103//
104// Then there is a dependence between X2 and X1 that goes back one iteration,
105// i.e. X1 is used as X2 in the very next iteration. We represent this as a
106// DepChain from X2 to X1 (X2->X1).
107// Similarly, there is a dependence between Y2 and X1 that goes back two
108// iterations. X1 is used as Y2 two iterations after it is computed. This is
109// represented by a DepChain as (Y2->X2->X1).
110//
111// A DepChain has the following properties.
112// 1. Num of edges in DepChain = Number of Instructions in DepChain = Number of
113// iterations of carried dependence + 1.
114// 2. All instructions in the DepChain except the last are PHIs.
115//===----------------------------------------------------------------------===//
116
117#define DEBUG_TYPE "hexagon-vlcr"
118
119#include "llvm/ADT/SetVector.h"
120#include "llvm/ADT/Triple.h"
121#include "llvm/Analysis/LoopPass.h"
122#include "llvm/Transforms/Scalar.h"
123#include "llvm/IR/IRBuilder.h"
124#include "llvm/Support/raw_ostream.h"
125#include "llvm/IR/Instructions.h"
126#include "llvm/IR/IntrinsicInst.h"
127#include "llvm/ADT/Statistic.h"
128#include <set>
129#include <map>
130using namespace llvm;
131
132STATISTIC(HexagonNumVectorLoopCarriedReuse,
133 "Number of values that were reused from a previous iteration.");
134
135static cl::opt<int> HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim",
136 cl::Hidden,
137 cl::desc("Maximum distance of loop carried dependences that are handled"),
138 cl::init(2), cl::ZeroOrMore);
139namespace llvm {
140 void initializeHexagonVectorLoopCarriedReusePass(PassRegistry&);
141 Pass *createHexagonVectorLoopCarriedReusePass();
142}
143namespace {
144 // See info about DepChain in the comments at the top of this file.
145 typedef SmallVector<Instruction *, 4> ChainOfDependences;
146 class DepChain {
147 ChainOfDependences Chain;
148 public:
149 bool isIdentical(DepChain &Other) {
150 if (Other.size() != size())
151 return false;
152 ChainOfDependences &OtherChain = Other.getChain();
153 for (int i = 0; i < size(); ++i) {
154 if (Chain[i] != OtherChain[i])
155 return false;
156 }
157 return true;
158 }
159 ChainOfDependences &getChain() {
160 return Chain;
161 }
162 int size() {
163 return Chain.size();
164 }
165 void clear() {
166 Chain.clear();
167 }
168 void push_back(Instruction *I) {
169 Chain.push_back(I);
170 }
171 int iterations() {
172 return size() - 1;
173 }
174 Instruction *front() {
175 return Chain.front();
176 }
177 Instruction *back() {
178 return Chain.back();
179 }
180 Instruction *&operator[](const int index) {
181 return Chain[index];
182 }
183 friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D);
184 };
185
NAKAMURA Takumifec5e102017-09-22 01:01:33 +0000186 LLVM_ATTRIBUTE_UNUSED
NAKAMURA Takumi05f60152017-09-22 01:01:31 +0000187 raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) {
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000188 const ChainOfDependences &CD = D.Chain;
189 int ChainSize = CD.size();
190 OS << "**DepChain Start::**\n";
191 for (int i = 0; i < ChainSize -1; ++i) {
192 OS << *(CD[i]) << " -->\n";
193 }
194 OS << *CD[ChainSize-1] << "\n";
195 return OS;
196 }
197}
198namespace {
199 struct ReuseValue {
200 Instruction *Inst2Replace;
201 // In the new PHI node that we'll construct this is the value that'll be
202 // used over the backedge. This is teh value that gets reused from a
203 // previous iteration.
204 Instruction * BackedgeInst;
205 ReuseValue() : Inst2Replace(nullptr), BackedgeInst(nullptr) {};
206 void reset() { Inst2Replace = nullptr; BackedgeInst = nullptr; }
207 bool isDefined() { return Inst2Replace != nullptr; }
208 };
209 typedef struct ReuseValue ReuseValue;
NAKAMURA Takumifec5e102017-09-22 01:01:33 +0000210 LLVM_ATTRIBUTE_UNUSED
NAKAMURA Takumi05f60152017-09-22 01:01:31 +0000211 raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) {
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000212 OS << "** ReuseValue ***\n";
213 OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n";
214 OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n";
215 return OS;
216 }
217}
218
219namespace {
220 class HexagonVectorLoopCarriedReuse : public LoopPass {
221 public:
222 static char ID;
223 explicit HexagonVectorLoopCarriedReuse() : LoopPass(ID) {
224 PassRegistry *PR = PassRegistry::getPassRegistry();
225 initializeHexagonVectorLoopCarriedReusePass(*PR);
226 }
227 StringRef getPassName() const override {
228 return "Hexagon-specific loop carried reuse for HVX vectors";
229 }
230
231 void getAnalysisUsage(AnalysisUsage &AU) const override {
232 AU.addRequired<LoopInfoWrapperPass>();
233 AU.addRequiredID(LoopSimplifyID);
234 AU.addRequiredID(LCSSAID);
235 AU.addPreservedID(LCSSAID);
236 AU.setPreservesCFG();
237 }
238
239 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
240
241 private:
242 SetVector<DepChain *> Dependences;
243 std::set<Instruction *> ReplacedInsts;
244 Loop *CurLoop;
245 ReuseValue ReuseCandidate;
246
247 bool doVLCR();
248 void findLoopCarriedDeps();
249 void findValueToReuse();
250 void findDepChainFromPHI(Instruction *I, DepChain &D);
251 void reuseValue();
252 Value *findValueInBlock(Value *Op, BasicBlock *BB);
253 bool isDepChainBtwn(Instruction *I1, Instruction *I2, int Iters);
254 DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2);
255 bool isEquivalentOperation(Instruction *I1, Instruction *I2);
256 bool canReplace(Instruction *I);
257
258 };
259}
260
261char HexagonVectorLoopCarriedReuse::ID = 0;
262
263INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",
264 "Hexagon-specific predictive commoning for HVX vectors", false, false)
265INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
266INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
267INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
268INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",
269 "Hexagon-specific predictive commoning for HVX vectors", false, false)
270
271bool HexagonVectorLoopCarriedReuse::runOnLoop(Loop *L, LPPassManager &LPM) {
272 if (skipLoop(L))
273 return false;
274
275 if (!L->getLoopPreheader())
276 return false;
277
278 // Work only on innermost loops.
279 if (L->getSubLoops().size() != 0)
280 return false;
281
282 // Work only on single basic blocks loops.
283 if (L->getNumBlocks() != 1)
284 return false;
285
286 CurLoop = L;
287
288 return doVLCR();
289}
290
291bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
292 Instruction *I2) {
293 if (!I1->isSameOperationAs(I2))
294 return false;
295 // This check is in place specifically for intrinsics. isSameOperationAs will
296 // return two for any two hexagon intrinsics because they are essentially the
297 // same instruciton (CallInst). We need to scratch the surface to see if they
298 // are calls to the same function.
299 if (CallInst *C1 = dyn_cast<CallInst>(I1)) {
300 if (CallInst *C2 = dyn_cast<CallInst>(I2)) {
301 if (C1->getCalledFunction() != C2->getCalledFunction())
302 return false;
303 }
304 }
305 return true;
306}
307
308bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) {
309 const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
310 if (II &&
311 (II->getIntrinsicID() == Intrinsic::hexagon_V6_hi ||
312 II->getIntrinsicID() == Intrinsic::hexagon_V6_lo)) {
313 DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n");
314 return false;
315 }
316 return true;
317}
318void HexagonVectorLoopCarriedReuse::findValueToReuse() {
319 for (auto *D : Dependences) {
320 DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n");
321 if (D->iterations() > HexagonVLCRIterationLim) {
322 DEBUG(dbgs() <<
323 ".. Skipping because number of iterations > than the limit\n");
324 continue;
325 }
326
327 PHINode *PN = cast<PHINode>(D->front());
328 Instruction *BEInst = D->back();
329 int Iters = D->iterations();
330 BasicBlock *BB = PN->getParent();
331 DEBUG(dbgs() << "Checking if any uses of " << *PN << " can be reused\n");
332
333 SmallVector<Instruction *, 4> PNUsers;
334 for (auto UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) {
335 Use &U = *UI;
336 Instruction *User = cast<Instruction>(U.getUser());
337
338 if (User->getParent() != BB)
339 continue;
340 if (ReplacedInsts.count(User)) {
341 DEBUG(dbgs() << *User << " has already been replaced. Skipping...\n");
342 continue;
343 }
344 if (isa<PHINode>(User))
345 continue;
346 if (User->mayHaveSideEffects())
347 continue;
348 if (!canReplace(User))
349 continue;
350
351 PNUsers.push_back(User);
352 }
353 DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n");
354
355 // For each interesting use I of PN, find an Instruction BEUser that
356 // performs the same operation as I on BEInst and whose other operands,
357 // if any, can also be rematerialized in OtherBB. We stop when we find the
358 // first such Instruction BEUser. This is because once BEUser is
359 // rematerialized in OtherBB, we may find more such "fixup" opportunities
360 // in this block. So, we'll start over again.
361 for (Instruction *I : PNUsers) {
362 for (auto UI = BEInst->use_begin(), E = BEInst->use_end(); UI != E;
363 ++UI) {
364 Use &U = *UI;
365 Instruction *BEUser = cast<Instruction>(U.getUser());
366
367 if (BEUser->getParent() != BB)
368 continue;
369 if (!isEquivalentOperation(I, BEUser))
370 continue;
371
372 int NumOperands = I->getNumOperands();
373
374 for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
375 Value *Op = I->getOperand(OpNo);
376 Instruction *OpInst = dyn_cast<Instruction>(Op);
377 if (!OpInst)
378 continue;
379
380 Value *BEOp = BEUser->getOperand(OpNo);
381 Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
382
383 if (!isDepChainBtwn(OpInst, BEOpInst, Iters)) {
384 BEUser = nullptr;
385 break;
386 }
387 }
388 if (BEUser) {
389 DEBUG(dbgs() << "Found Value for reuse.\n");
390 ReuseCandidate.Inst2Replace = I;
391 ReuseCandidate.BackedgeInst = BEUser;
392 return;
393 } else
394 ReuseCandidate.reset();
395 }
396 }
397 }
398 ReuseCandidate.reset();
399 return;
400}
401Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op,
402 BasicBlock *BB) {
403 PHINode *PN = dyn_cast<PHINode>(Op);
404 assert(PN);
405 Value *ValueInBlock = PN->getIncomingValueForBlock(BB);
406 return ValueInBlock;
407}
408void HexagonVectorLoopCarriedReuse::reuseValue() {
409 DEBUG(dbgs() << ReuseCandidate);
410 Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
411 Instruction *BEInst = ReuseCandidate.BackedgeInst;
412 int NumOperands = Inst2Replace->getNumOperands();
413 std::map<Instruction *, DepChain *> DepChains;
414 int Iterations = -1;
415 BasicBlock *LoopPH = CurLoop->getLoopPreheader();
416
417 for (int i = 0; i < NumOperands; ++i) {
418 Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(i));
419 if(!I)
420 continue;
421 else {
422 Instruction *J = cast<Instruction>(BEInst->getOperand(i));
423 DepChain *D = getDepChainBtwn(I, J);
424
425 assert(D &&
426 "No DepChain between corresponding operands in ReuseCandidate\n");
427 if (Iterations == -1)
428 Iterations = D->iterations();
429 assert(Iterations == D->iterations() && "Iterations mismatch");
430 DepChains[I] = D;
431 }
432 }
433
434 DEBUG(dbgs() << "reuseValue is making the following changes\n");
435
436 SmallVector<Instruction *, 4> InstsInPreheader;
437 for (int i = 0; i < Iterations; ++i) {
438 Instruction *InstInPreheader = Inst2Replace->clone();
439 SmallVector<Value *, 4> Ops;
440 for (int j = 0; j < NumOperands; ++j) {
441 Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(j));
442 if (!I)
443 continue;
444 // Get the DepChain corresponding to this operand.
445 DepChain &D = *DepChains[I];
446 // Get the PHI for the iteration number and find
447 // the incoming value from the Loop Preheader for
448 // that PHI.
449 Value *ValInPreheader = findValueInBlock(D[i], LoopPH);
450 InstInPreheader->setOperand(j, ValInPreheader);
451 }
452 InstsInPreheader.push_back(InstInPreheader);
453 InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr");
454 InstInPreheader->insertBefore(LoopPH->getTerminator());
455 DEBUG(dbgs() << "Added " << *InstInPreheader << " to " << LoopPH->getName()
456 << "\n");
457 }
458 BasicBlock *BB = BEInst->getParent();
459 IRBuilder<> IRB(BB);
460 IRB.SetInsertPoint(BB->getFirstNonPHI());
461 Value *BEVal = BEInst;
462 PHINode *NewPhi;
463 for (int i = Iterations-1; i >=0 ; --i) {
464 Instruction *InstInPreheader = InstsInPreheader[i];
465 NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2);
466 NewPhi->addIncoming(InstInPreheader, LoopPH);
467 NewPhi->addIncoming(BEVal, BB);
468 DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName() << "\n");
469 BEVal = NewPhi;
470 }
471 // We are in LCSSA form. So, a value defined inside the Loop is used only
472 // inside the loop. So, the following is safe.
473 Inst2Replace->replaceAllUsesWith(NewPhi);
474 ReplacedInsts.insert(Inst2Replace);
475 ++HexagonNumVectorLoopCarriedReuse;
476}
477
478bool HexagonVectorLoopCarriedReuse::doVLCR() {
479 assert((CurLoop->getSubLoops().size() == 0) &&
480 "Can do VLCR on the innermost loop only");
481 assert((CurLoop->getNumBlocks() == 1) &&
482 "Can do VLCR only on single block loops");
483
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000484 bool Changed;
485 bool Continue;
486
Richard Trieucc10e632017-09-21 23:48:01 +0000487 DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n");
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000488 do {
489 // Reset datastructures.
490 Dependences.clear();
491 Continue = false;
492
493 findLoopCarriedDeps();
494 findValueToReuse();
495 if (ReuseCandidate.isDefined()) {
496 reuseValue();
497 Changed = true;
498 Continue = true;
499 }
500 std::for_each(Dependences.begin(), Dependences.end(),
501 std::default_delete<DepChain>());
502 } while (Continue);
503 return Changed;
504}
505void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I,
506 DepChain &D) {
507 PHINode *PN = dyn_cast<PHINode>(I);
508 if (!PN) {
509 D.push_back(I);
510 return;
511 } else {
512 auto NumIncomingValues = PN->getNumIncomingValues();
513 if (NumIncomingValues != 2) {
514 D.clear();
515 return;
516 }
517
518 BasicBlock *BB = PN->getParent();
519 if (BB != CurLoop->getHeader()) {
520 D.clear();
521 return;
522 }
523
524 Value *BEVal = PN->getIncomingValueForBlock(BB);
525 Instruction *BEInst = dyn_cast<Instruction>(BEVal);
526 // This is a single block loop with a preheader, so at least
527 // one value should come over the backedge.
528 assert(BEInst && "There should be a value over the backedge");
529
530 Value *PreHdrVal =
531 PN->getIncomingValueForBlock(CurLoop->getLoopPreheader());
532 if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) {
533 D.clear();
534 return;
535 }
536 D.push_back(PN);
537 findDepChainFromPHI(BEInst, D);
538 }
539 return;
540}
541
542bool HexagonVectorLoopCarriedReuse::isDepChainBtwn(Instruction *I1,
543 Instruction *I2,
544 int Iters) {
545 for (auto *D : Dependences) {
546 if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)
547 return true;
548 }
549 return false;
550}
551DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1,
552 Instruction *I2) {
553 for (auto *D : Dependences) {
554 if (D->front() == I1 && D->back() == I2)
555 return D;
556 }
557 return nullptr;
558}
559void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
560 BasicBlock *BB = CurLoop->getHeader();
561 for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
562 auto *PN = cast<PHINode>(I);
563 if (!isa<VectorType>(PN->getType()))
564 continue;
565
566 DepChain *D = new DepChain();
567 findDepChainFromPHI(PN, *D);
568 if (D->size() != 0)
569 Dependences.insert(D);
570 else
571 delete D;
572 }
573 DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n");
574 DEBUG(for (size_t i = 0; i < Dependences.size(); ++i) {
575 dbgs() << *Dependences[i] << "\n";
576 });
577}
578Pass *llvm::createHexagonVectorLoopCarriedReusePass() {
579 return new HexagonVectorLoopCarriedReuse();
580}