blob: 18dfd2aced17b07c5871b9c2bfda1c43bf044f2c [file] [log] [blame]
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +00001//===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9// This pass removes the computation of provably redundant expressions that have
10// been computed earlier in a previous iteration. It relies on the use of PHIs
11// to identify loop carried dependences. This is scalar replacement for vector
12// types.
13//
14//-----------------------------------------------------------------------------
15// Motivation: Consider the case where we have the following loop structure.
16//
17// Loop:
18// t0 = a[i];
19// t1 = f(t0);
20// t2 = g(t1);
21// ...
22// t3 = a[i+1];
23// t4 = f(t3);
24// t5 = g(t4);
25// t6 = op(t2, t5)
26// cond_branch <Loop>
27//
28// This can be converted to
29// t00 = a[0];
30// t10 = f(t00);
31// t20 = g(t10);
32// Loop:
33// t2 = t20;
34// t3 = a[i+1];
35// t4 = f(t3);
36// t5 = g(t4);
37// t6 = op(t2, t5)
38// t20 = t5
39// cond_branch <Loop>
40//
41// SROA does a good job of reusing a[i+1] as a[i] in the next iteration.
42// Such a loop comes to this pass in the following form.
43//
44// LoopPreheader:
45// X0 = a[0];
46// Loop:
47// X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
48// t1 = f(X2) <-- I1
49// t2 = g(t1)
50// ...
51// X1 = a[i+1]
52// t4 = f(X1) <-- I2
53// t5 = g(t4)
54// t6 = op(t2, t5)
55// cond_branch <Loop>
56//
57// In this pass, we look for PHIs such as X2 whose incoming values come only
58// from the Loop Preheader and over the backedge and additionaly, both these
59// values are the results of the same operation in terms of opcode. We call such
60// a PHI node a dependence chain or DepChain. In this case, the dependence of X2
61// over X1 is carried over only one iteration and so the DepChain is only one
62// PHI node long.
63//
64// Then, we traverse the uses of the PHI (X2) and the uses of the value of the
65// PHI coming over the backedge (X1). We stop at the first pair of such users
66// I1 (of X2) and I2 (of X1) that meet the following conditions.
67// 1. I1 and I2 are the same operation, but with different operands.
68// 2. X2 and X1 are used at the same operand number in the two instructions.
69// 3. All other operands Op1 of I1 and Op2 of I2 are also such that there is a
70// a DepChain from Op1 to Op2 of the same length as that between X2 and X1.
71//
72// We then make the following transformation
73// LoopPreheader:
74// X0 = a[0];
75// Y0 = f(X0);
76// Loop:
77// X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
78// Y2 = PHI<(Y0, LoopPreheader), (t4, Loop)>
79// t1 = f(X2) <-- Will be removed by DCE.
80// t2 = g(Y2)
81// ...
82// X1 = a[i+1]
83// t4 = f(X1)
84// t5 = g(t4)
85// t6 = op(t2, t5)
86// cond_branch <Loop>
87//
88// We proceed until we cannot find any more such instructions I1 and I2.
89//
90// --- DepChains & Loop carried dependences ---
91// Consider a single basic block loop such as
92//
93// LoopPreheader:
94// X0 = ...
95// Y0 = ...
96// Loop:
97// X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
98// Y2 = PHI<(Y0, LoopPreheader), (X2, Loop)>
99// ...
100// X1 = ...
101// ...
102// cond_branch <Loop>
103//
104// Then there is a dependence between X2 and X1 that goes back one iteration,
105// i.e. X1 is used as X2 in the very next iteration. We represent this as a
106// DepChain from X2 to X1 (X2->X1).
107// Similarly, there is a dependence between Y2 and X1 that goes back two
108// iterations. X1 is used as Y2 two iterations after it is computed. This is
109// represented by a DepChain as (Y2->X2->X1).
110//
111// A DepChain has the following properties.
112// 1. Num of edges in DepChain = Number of Instructions in DepChain = Number of
113// iterations of carried dependence + 1.
114// 2. All instructions in the DepChain except the last are PHIs.
115//===----------------------------------------------------------------------===//
116
117#define DEBUG_TYPE "hexagon-vlcr"
118
119#include "llvm/ADT/SetVector.h"
120#include "llvm/ADT/Triple.h"
121#include "llvm/Analysis/LoopPass.h"
122#include "llvm/Transforms/Scalar.h"
123#include "llvm/IR/IRBuilder.h"
124#include "llvm/Support/raw_ostream.h"
125#include "llvm/IR/Instructions.h"
126#include "llvm/IR/IntrinsicInst.h"
127#include "llvm/ADT/Statistic.h"
128#include <set>
129#include <map>
130using namespace llvm;
131
132STATISTIC(HexagonNumVectorLoopCarriedReuse,
133 "Number of values that were reused from a previous iteration.");
134
135static cl::opt<int> HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim",
136 cl::Hidden,
137 cl::desc("Maximum distance of loop carried dependences that are handled"),
138 cl::init(2), cl::ZeroOrMore);
139namespace llvm {
140 void initializeHexagonVectorLoopCarriedReusePass(PassRegistry&);
141 Pass *createHexagonVectorLoopCarriedReusePass();
142}
143namespace {
144 // See info about DepChain in the comments at the top of this file.
145 typedef SmallVector<Instruction *, 4> ChainOfDependences;
146 class DepChain {
147 ChainOfDependences Chain;
148 public:
149 bool isIdentical(DepChain &Other) {
150 if (Other.size() != size())
151 return false;
152 ChainOfDependences &OtherChain = Other.getChain();
153 for (int i = 0; i < size(); ++i) {
154 if (Chain[i] != OtherChain[i])
155 return false;
156 }
157 return true;
158 }
159 ChainOfDependences &getChain() {
160 return Chain;
161 }
162 int size() {
163 return Chain.size();
164 }
165 void clear() {
166 Chain.clear();
167 }
168 void push_back(Instruction *I) {
169 Chain.push_back(I);
170 }
171 int iterations() {
172 return size() - 1;
173 }
174 Instruction *front() {
175 return Chain.front();
176 }
177 Instruction *back() {
178 return Chain.back();
179 }
180 Instruction *&operator[](const int index) {
181 return Chain[index];
182 }
183 friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D);
184 };
185
NAKAMURA Takumi05f60152017-09-22 01:01:31 +0000186 raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) {
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000187 const ChainOfDependences &CD = D.Chain;
188 int ChainSize = CD.size();
189 OS << "**DepChain Start::**\n";
190 for (int i = 0; i < ChainSize -1; ++i) {
191 OS << *(CD[i]) << " -->\n";
192 }
193 OS << *CD[ChainSize-1] << "\n";
194 return OS;
195 }
196}
197namespace {
198 struct ReuseValue {
199 Instruction *Inst2Replace;
200 // In the new PHI node that we'll construct this is the value that'll be
201 // used over the backedge. This is teh value that gets reused from a
202 // previous iteration.
203 Instruction * BackedgeInst;
204 ReuseValue() : Inst2Replace(nullptr), BackedgeInst(nullptr) {};
205 void reset() { Inst2Replace = nullptr; BackedgeInst = nullptr; }
206 bool isDefined() { return Inst2Replace != nullptr; }
207 };
208 typedef struct ReuseValue ReuseValue;
NAKAMURA Takumi05f60152017-09-22 01:01:31 +0000209 raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) {
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000210 OS << "** ReuseValue ***\n";
211 OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n";
212 OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n";
213 return OS;
214 }
215}
216
217namespace {
218 class HexagonVectorLoopCarriedReuse : public LoopPass {
219 public:
220 static char ID;
221 explicit HexagonVectorLoopCarriedReuse() : LoopPass(ID) {
222 PassRegistry *PR = PassRegistry::getPassRegistry();
223 initializeHexagonVectorLoopCarriedReusePass(*PR);
224 }
225 StringRef getPassName() const override {
226 return "Hexagon-specific loop carried reuse for HVX vectors";
227 }
228
229 void getAnalysisUsage(AnalysisUsage &AU) const override {
230 AU.addRequired<LoopInfoWrapperPass>();
231 AU.addRequiredID(LoopSimplifyID);
232 AU.addRequiredID(LCSSAID);
233 AU.addPreservedID(LCSSAID);
234 AU.setPreservesCFG();
235 }
236
237 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
238
239 private:
240 SetVector<DepChain *> Dependences;
241 std::set<Instruction *> ReplacedInsts;
242 Loop *CurLoop;
243 ReuseValue ReuseCandidate;
244
245 bool doVLCR();
246 void findLoopCarriedDeps();
247 void findValueToReuse();
248 void findDepChainFromPHI(Instruction *I, DepChain &D);
249 void reuseValue();
250 Value *findValueInBlock(Value *Op, BasicBlock *BB);
251 bool isDepChainBtwn(Instruction *I1, Instruction *I2, int Iters);
252 DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2);
253 bool isEquivalentOperation(Instruction *I1, Instruction *I2);
254 bool canReplace(Instruction *I);
255
256 };
257}
258
259char HexagonVectorLoopCarriedReuse::ID = 0;
260
261INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",
262 "Hexagon-specific predictive commoning for HVX vectors", false, false)
263INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
264INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
265INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
266INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",
267 "Hexagon-specific predictive commoning for HVX vectors", false, false)
268
269bool HexagonVectorLoopCarriedReuse::runOnLoop(Loop *L, LPPassManager &LPM) {
270 if (skipLoop(L))
271 return false;
272
273 if (!L->getLoopPreheader())
274 return false;
275
276 // Work only on innermost loops.
277 if (L->getSubLoops().size() != 0)
278 return false;
279
280 // Work only on single basic blocks loops.
281 if (L->getNumBlocks() != 1)
282 return false;
283
284 CurLoop = L;
285
286 return doVLCR();
287}
288
289bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
290 Instruction *I2) {
291 if (!I1->isSameOperationAs(I2))
292 return false;
293 // This check is in place specifically for intrinsics. isSameOperationAs will
294 // return two for any two hexagon intrinsics because they are essentially the
295 // same instruciton (CallInst). We need to scratch the surface to see if they
296 // are calls to the same function.
297 if (CallInst *C1 = dyn_cast<CallInst>(I1)) {
298 if (CallInst *C2 = dyn_cast<CallInst>(I2)) {
299 if (C1->getCalledFunction() != C2->getCalledFunction())
300 return false;
301 }
302 }
303 return true;
304}
305
306bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) {
307 const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
308 if (II &&
309 (II->getIntrinsicID() == Intrinsic::hexagon_V6_hi ||
310 II->getIntrinsicID() == Intrinsic::hexagon_V6_lo)) {
311 DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n");
312 return false;
313 }
314 return true;
315}
316void HexagonVectorLoopCarriedReuse::findValueToReuse() {
317 for (auto *D : Dependences) {
318 DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n");
319 if (D->iterations() > HexagonVLCRIterationLim) {
320 DEBUG(dbgs() <<
321 ".. Skipping because number of iterations > than the limit\n");
322 continue;
323 }
324
325 PHINode *PN = cast<PHINode>(D->front());
326 Instruction *BEInst = D->back();
327 int Iters = D->iterations();
328 BasicBlock *BB = PN->getParent();
329 DEBUG(dbgs() << "Checking if any uses of " << *PN << " can be reused\n");
330
331 SmallVector<Instruction *, 4> PNUsers;
332 for (auto UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) {
333 Use &U = *UI;
334 Instruction *User = cast<Instruction>(U.getUser());
335
336 if (User->getParent() != BB)
337 continue;
338 if (ReplacedInsts.count(User)) {
339 DEBUG(dbgs() << *User << " has already been replaced. Skipping...\n");
340 continue;
341 }
342 if (isa<PHINode>(User))
343 continue;
344 if (User->mayHaveSideEffects())
345 continue;
346 if (!canReplace(User))
347 continue;
348
349 PNUsers.push_back(User);
350 }
351 DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n");
352
353 // For each interesting use I of PN, find an Instruction BEUser that
354 // performs the same operation as I on BEInst and whose other operands,
355 // if any, can also be rematerialized in OtherBB. We stop when we find the
356 // first such Instruction BEUser. This is because once BEUser is
357 // rematerialized in OtherBB, we may find more such "fixup" opportunities
358 // in this block. So, we'll start over again.
359 for (Instruction *I : PNUsers) {
360 for (auto UI = BEInst->use_begin(), E = BEInst->use_end(); UI != E;
361 ++UI) {
362 Use &U = *UI;
363 Instruction *BEUser = cast<Instruction>(U.getUser());
364
365 if (BEUser->getParent() != BB)
366 continue;
367 if (!isEquivalentOperation(I, BEUser))
368 continue;
369
370 int NumOperands = I->getNumOperands();
371
372 for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
373 Value *Op = I->getOperand(OpNo);
374 Instruction *OpInst = dyn_cast<Instruction>(Op);
375 if (!OpInst)
376 continue;
377
378 Value *BEOp = BEUser->getOperand(OpNo);
379 Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
380
381 if (!isDepChainBtwn(OpInst, BEOpInst, Iters)) {
382 BEUser = nullptr;
383 break;
384 }
385 }
386 if (BEUser) {
387 DEBUG(dbgs() << "Found Value for reuse.\n");
388 ReuseCandidate.Inst2Replace = I;
389 ReuseCandidate.BackedgeInst = BEUser;
390 return;
391 } else
392 ReuseCandidate.reset();
393 }
394 }
395 }
396 ReuseCandidate.reset();
397 return;
398}
399Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op,
400 BasicBlock *BB) {
401 PHINode *PN = dyn_cast<PHINode>(Op);
402 assert(PN);
403 Value *ValueInBlock = PN->getIncomingValueForBlock(BB);
404 return ValueInBlock;
405}
406void HexagonVectorLoopCarriedReuse::reuseValue() {
407 DEBUG(dbgs() << ReuseCandidate);
408 Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
409 Instruction *BEInst = ReuseCandidate.BackedgeInst;
410 int NumOperands = Inst2Replace->getNumOperands();
411 std::map<Instruction *, DepChain *> DepChains;
412 int Iterations = -1;
413 BasicBlock *LoopPH = CurLoop->getLoopPreheader();
414
415 for (int i = 0; i < NumOperands; ++i) {
416 Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(i));
417 if(!I)
418 continue;
419 else {
420 Instruction *J = cast<Instruction>(BEInst->getOperand(i));
421 DepChain *D = getDepChainBtwn(I, J);
422
423 assert(D &&
424 "No DepChain between corresponding operands in ReuseCandidate\n");
425 if (Iterations == -1)
426 Iterations = D->iterations();
427 assert(Iterations == D->iterations() && "Iterations mismatch");
428 DepChains[I] = D;
429 }
430 }
431
432 DEBUG(dbgs() << "reuseValue is making the following changes\n");
433
434 SmallVector<Instruction *, 4> InstsInPreheader;
435 for (int i = 0; i < Iterations; ++i) {
436 Instruction *InstInPreheader = Inst2Replace->clone();
437 SmallVector<Value *, 4> Ops;
438 for (int j = 0; j < NumOperands; ++j) {
439 Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(j));
440 if (!I)
441 continue;
442 // Get the DepChain corresponding to this operand.
443 DepChain &D = *DepChains[I];
444 // Get the PHI for the iteration number and find
445 // the incoming value from the Loop Preheader for
446 // that PHI.
447 Value *ValInPreheader = findValueInBlock(D[i], LoopPH);
448 InstInPreheader->setOperand(j, ValInPreheader);
449 }
450 InstsInPreheader.push_back(InstInPreheader);
451 InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr");
452 InstInPreheader->insertBefore(LoopPH->getTerminator());
453 DEBUG(dbgs() << "Added " << *InstInPreheader << " to " << LoopPH->getName()
454 << "\n");
455 }
456 BasicBlock *BB = BEInst->getParent();
457 IRBuilder<> IRB(BB);
458 IRB.SetInsertPoint(BB->getFirstNonPHI());
459 Value *BEVal = BEInst;
460 PHINode *NewPhi;
461 for (int i = Iterations-1; i >=0 ; --i) {
462 Instruction *InstInPreheader = InstsInPreheader[i];
463 NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2);
464 NewPhi->addIncoming(InstInPreheader, LoopPH);
465 NewPhi->addIncoming(BEVal, BB);
466 DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName() << "\n");
467 BEVal = NewPhi;
468 }
469 // We are in LCSSA form. So, a value defined inside the Loop is used only
470 // inside the loop. So, the following is safe.
471 Inst2Replace->replaceAllUsesWith(NewPhi);
472 ReplacedInsts.insert(Inst2Replace);
473 ++HexagonNumVectorLoopCarriedReuse;
474}
475
476bool HexagonVectorLoopCarriedReuse::doVLCR() {
477 assert((CurLoop->getSubLoops().size() == 0) &&
478 "Can do VLCR on the innermost loop only");
479 assert((CurLoop->getNumBlocks() == 1) &&
480 "Can do VLCR only on single block loops");
481
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000482 bool Changed;
483 bool Continue;
484
Richard Trieucc10e632017-09-21 23:48:01 +0000485 DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n");
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000486 do {
487 // Reset datastructures.
488 Dependences.clear();
489 Continue = false;
490
491 findLoopCarriedDeps();
492 findValueToReuse();
493 if (ReuseCandidate.isDefined()) {
494 reuseValue();
495 Changed = true;
496 Continue = true;
497 }
498 std::for_each(Dependences.begin(), Dependences.end(),
499 std::default_delete<DepChain>());
500 } while (Continue);
501 return Changed;
502}
503void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I,
504 DepChain &D) {
505 PHINode *PN = dyn_cast<PHINode>(I);
506 if (!PN) {
507 D.push_back(I);
508 return;
509 } else {
510 auto NumIncomingValues = PN->getNumIncomingValues();
511 if (NumIncomingValues != 2) {
512 D.clear();
513 return;
514 }
515
516 BasicBlock *BB = PN->getParent();
517 if (BB != CurLoop->getHeader()) {
518 D.clear();
519 return;
520 }
521
522 Value *BEVal = PN->getIncomingValueForBlock(BB);
523 Instruction *BEInst = dyn_cast<Instruction>(BEVal);
524 // This is a single block loop with a preheader, so at least
525 // one value should come over the backedge.
526 assert(BEInst && "There should be a value over the backedge");
527
528 Value *PreHdrVal =
529 PN->getIncomingValueForBlock(CurLoop->getLoopPreheader());
530 if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) {
531 D.clear();
532 return;
533 }
534 D.push_back(PN);
535 findDepChainFromPHI(BEInst, D);
536 }
537 return;
538}
539
540bool HexagonVectorLoopCarriedReuse::isDepChainBtwn(Instruction *I1,
541 Instruction *I2,
542 int Iters) {
543 for (auto *D : Dependences) {
544 if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)
545 return true;
546 }
547 return false;
548}
549DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1,
550 Instruction *I2) {
551 for (auto *D : Dependences) {
552 if (D->front() == I1 && D->back() == I2)
553 return D;
554 }
555 return nullptr;
556}
557void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
558 BasicBlock *BB = CurLoop->getHeader();
559 for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
560 auto *PN = cast<PHINode>(I);
561 if (!isa<VectorType>(PN->getType()))
562 continue;
563
564 DepChain *D = new DepChain();
565 findDepChainFromPHI(PN, *D);
566 if (D->size() != 0)
567 Dependences.insert(D);
568 else
569 delete D;
570 }
571 DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n");
572 DEBUG(for (size_t i = 0; i < Dependences.size(); ++i) {
573 dbgs() << *Dependences[i] << "\n";
574 });
575}
576Pass *llvm::createHexagonVectorLoopCarriedReusePass() {
577 return new HexagonVectorLoopCarriedReuse();
578}