blob: b3ef3e487280eaeca216dec1d35b3cfa46c422ce [file] [log] [blame]
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +00001//===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +00006//
7//===----------------------------------------------------------------------===//
Eugene Zelenko3b873362017-09-28 22:27:31 +00008//
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +00009// This pass removes the computation of provably redundant expressions that have
10// been computed earlier in a previous iteration. It relies on the use of PHIs
11// to identify loop carried dependences. This is scalar replacement for vector
12// types.
13//
14//-----------------------------------------------------------------------------
15// Motivation: Consider the case where we have the following loop structure.
16//
17// Loop:
18// t0 = a[i];
19// t1 = f(t0);
20// t2 = g(t1);
21// ...
22// t3 = a[i+1];
23// t4 = f(t3);
24// t5 = g(t4);
25// t6 = op(t2, t5)
26// cond_branch <Loop>
27//
28// This can be converted to
29// t00 = a[0];
30// t10 = f(t00);
31// t20 = g(t10);
32// Loop:
33// t2 = t20;
34// t3 = a[i+1];
35// t4 = f(t3);
36// t5 = g(t4);
37// t6 = op(t2, t5)
38// t20 = t5
39// cond_branch <Loop>
40//
41// SROA does a good job of reusing a[i+1] as a[i] in the next iteration.
42// Such a loop comes to this pass in the following form.
43//
44// LoopPreheader:
45// X0 = a[0];
46// Loop:
47// X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
48// t1 = f(X2) <-- I1
49// t2 = g(t1)
50// ...
51// X1 = a[i+1]
52// t4 = f(X1) <-- I2
53// t5 = g(t4)
54// t6 = op(t2, t5)
55// cond_branch <Loop>
56//
57// In this pass, we look for PHIs such as X2 whose incoming values come only
58// from the Loop Preheader and over the backedge and additionaly, both these
59// values are the results of the same operation in terms of opcode. We call such
60// a PHI node a dependence chain or DepChain. In this case, the dependence of X2
61// over X1 is carried over only one iteration and so the DepChain is only one
62// PHI node long.
63//
64// Then, we traverse the uses of the PHI (X2) and the uses of the value of the
65// PHI coming over the backedge (X1). We stop at the first pair of such users
66// I1 (of X2) and I2 (of X1) that meet the following conditions.
67// 1. I1 and I2 are the same operation, but with different operands.
68// 2. X2 and X1 are used at the same operand number in the two instructions.
69// 3. All other operands Op1 of I1 and Op2 of I2 are also such that there is a
70// a DepChain from Op1 to Op2 of the same length as that between X2 and X1.
71//
72// We then make the following transformation
73// LoopPreheader:
74// X0 = a[0];
75// Y0 = f(X0);
76// Loop:
77// X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
78// Y2 = PHI<(Y0, LoopPreheader), (t4, Loop)>
79// t1 = f(X2) <-- Will be removed by DCE.
80// t2 = g(Y2)
81// ...
82// X1 = a[i+1]
83// t4 = f(X1)
84// t5 = g(t4)
85// t6 = op(t2, t5)
86// cond_branch <Loop>
87//
88// We proceed until we cannot find any more such instructions I1 and I2.
89//
90// --- DepChains & Loop carried dependences ---
91// Consider a single basic block loop such as
92//
93// LoopPreheader:
94// X0 = ...
95// Y0 = ...
96// Loop:
97// X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
98// Y2 = PHI<(Y0, LoopPreheader), (X2, Loop)>
99// ...
100// X1 = ...
101// ...
102// cond_branch <Loop>
103//
104// Then there is a dependence between X2 and X1 that goes back one iteration,
105// i.e. X1 is used as X2 in the very next iteration. We represent this as a
106// DepChain from X2 to X1 (X2->X1).
107// Similarly, there is a dependence between Y2 and X1 that goes back two
108// iterations. X1 is used as Y2 two iterations after it is computed. This is
109// represented by a DepChain as (Y2->X2->X1).
110//
111// A DepChain has the following properties.
112// 1. Num of edges in DepChain = Number of Instructions in DepChain = Number of
113// iterations of carried dependence + 1.
114// 2. All instructions in the DepChain except the last are PHIs.
Eugene Zelenko3b873362017-09-28 22:27:31 +0000115//
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000116//===----------------------------------------------------------------------===//
117
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000118#include "llvm/ADT/SetVector.h"
Eugene Zelenko3b873362017-09-28 22:27:31 +0000119#include "llvm/ADT/SmallVector.h"
120#include "llvm/ADT/Statistic.h"
121#include "llvm/Analysis/LoopInfo.h"
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000122#include "llvm/Analysis/LoopPass.h"
Eugene Zelenko3b873362017-09-28 22:27:31 +0000123#include "llvm/IR/BasicBlock.h"
124#include "llvm/IR/DerivedTypes.h"
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000125#include "llvm/IR/IRBuilder.h"
Eugene Zelenko3b873362017-09-28 22:27:31 +0000126#include "llvm/IR/Instruction.h"
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000127#include "llvm/IR/Instructions.h"
128#include "llvm/IR/IntrinsicInst.h"
Eugene Zelenko3b873362017-09-28 22:27:31 +0000129#include "llvm/IR/Intrinsics.h"
130#include "llvm/IR/Use.h"
131#include "llvm/IR/User.h"
132#include "llvm/IR/Value.h"
133#include "llvm/Pass.h"
134#include "llvm/Support/Casting.h"
135#include "llvm/Support/CommandLine.h"
136#include "llvm/Support/Compiler.h"
137#include "llvm/Support/Debug.h"
138#include "llvm/Support/raw_ostream.h"
139#include "llvm/Transforms/Scalar.h"
David Blaikiea373d182018-03-28 17:44:36 +0000140#include "llvm/Transforms/Utils.h"
Eugene Zelenko3b873362017-09-28 22:27:31 +0000141#include <algorithm>
142#include <cassert>
143#include <cstddef>
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000144#include <map>
Eugene Zelenko3b873362017-09-28 22:27:31 +0000145#include <memory>
146#include <set>
147
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000148using namespace llvm;
149
Eugene Zelenko3b873362017-09-28 22:27:31 +0000150#define DEBUG_TYPE "hexagon-vlcr"
151
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000152STATISTIC(HexagonNumVectorLoopCarriedReuse,
153 "Number of values that were reused from a previous iteration.");
154
155static cl::opt<int> HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim",
156 cl::Hidden,
157 cl::desc("Maximum distance of loop carried dependences that are handled"),
158 cl::init(2), cl::ZeroOrMore);
Eugene Zelenko3b873362017-09-28 22:27:31 +0000159
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000160namespace llvm {
Eugene Zelenko3b873362017-09-28 22:27:31 +0000161
162void initializeHexagonVectorLoopCarriedReusePass(PassRegistry&);
163Pass *createHexagonVectorLoopCarriedReusePass();
164
165} // end namespace llvm
166
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000167namespace {
Eugene Zelenko3b873362017-09-28 22:27:31 +0000168
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000169 // See info about DepChain in the comments at the top of this file.
Eugene Zelenko3b873362017-09-28 22:27:31 +0000170 using ChainOfDependences = SmallVector<Instruction *, 4>;
171
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000172 class DepChain {
173 ChainOfDependences Chain;
Eugene Zelenko3b873362017-09-28 22:27:31 +0000174
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000175 public:
Eugene Zelenko3b873362017-09-28 22:27:31 +0000176 bool isIdentical(DepChain &Other) const {
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000177 if (Other.size() != size())
178 return false;
179 ChainOfDependences &OtherChain = Other.getChain();
180 for (int i = 0; i < size(); ++i) {
181 if (Chain[i] != OtherChain[i])
182 return false;
183 }
184 return true;
185 }
Eugene Zelenko3b873362017-09-28 22:27:31 +0000186
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000187 ChainOfDependences &getChain() {
188 return Chain;
189 }
Eugene Zelenko3b873362017-09-28 22:27:31 +0000190
191 int size() const {
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000192 return Chain.size();
193 }
Eugene Zelenko3b873362017-09-28 22:27:31 +0000194
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000195 void clear() {
196 Chain.clear();
197 }
Eugene Zelenko3b873362017-09-28 22:27:31 +0000198
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000199 void push_back(Instruction *I) {
200 Chain.push_back(I);
201 }
Eugene Zelenko3b873362017-09-28 22:27:31 +0000202
203 int iterations() const {
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000204 return size() - 1;
205 }
Eugene Zelenko3b873362017-09-28 22:27:31 +0000206
207 Instruction *front() const {
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000208 return Chain.front();
209 }
Eugene Zelenko3b873362017-09-28 22:27:31 +0000210
211 Instruction *back() const {
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000212 return Chain.back();
213 }
Eugene Zelenko3b873362017-09-28 22:27:31 +0000214
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000215 Instruction *&operator[](const int index) {
216 return Chain[index];
217 }
Eugene Zelenko3b873362017-09-28 22:27:31 +0000218
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000219 friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D);
220 };
221
NAKAMURA Takumifec5e102017-09-22 01:01:33 +0000222 LLVM_ATTRIBUTE_UNUSED
NAKAMURA Takumi05f60152017-09-22 01:01:31 +0000223 raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) {
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000224 const ChainOfDependences &CD = D.Chain;
225 int ChainSize = CD.size();
226 OS << "**DepChain Start::**\n";
227 for (int i = 0; i < ChainSize -1; ++i) {
228 OS << *(CD[i]) << " -->\n";
229 }
230 OS << *CD[ChainSize-1] << "\n";
231 return OS;
232 }
Eugene Zelenko3b873362017-09-28 22:27:31 +0000233
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000234 struct ReuseValue {
Eugene Zelenko3b873362017-09-28 22:27:31 +0000235 Instruction *Inst2Replace = nullptr;
236
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000237 // In the new PHI node that we'll construct this is the value that'll be
238 // used over the backedge. This is teh value that gets reused from a
239 // previous iteration.
Eugene Zelenko3b873362017-09-28 22:27:31 +0000240 Instruction *BackedgeInst = nullptr;
241
242 ReuseValue() = default;
243
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000244 void reset() { Inst2Replace = nullptr; BackedgeInst = nullptr; }
245 bool isDefined() { return Inst2Replace != nullptr; }
246 };
Eugene Zelenko3b873362017-09-28 22:27:31 +0000247
NAKAMURA Takumifec5e102017-09-22 01:01:33 +0000248 LLVM_ATTRIBUTE_UNUSED
NAKAMURA Takumi05f60152017-09-22 01:01:31 +0000249 raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) {
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000250 OS << "** ReuseValue ***\n";
251 OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n";
252 OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n";
253 return OS;
254 }
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000255
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000256 class HexagonVectorLoopCarriedReuse : public LoopPass {
257 public:
258 static char ID;
Eugene Zelenko3b873362017-09-28 22:27:31 +0000259
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000260 explicit HexagonVectorLoopCarriedReuse() : LoopPass(ID) {
261 PassRegistry *PR = PassRegistry::getPassRegistry();
262 initializeHexagonVectorLoopCarriedReusePass(*PR);
263 }
Eugene Zelenko3b873362017-09-28 22:27:31 +0000264
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000265 StringRef getPassName() const override {
266 return "Hexagon-specific loop carried reuse for HVX vectors";
267 }
268
Eugene Zelenko3b873362017-09-28 22:27:31 +0000269 void getAnalysisUsage(AnalysisUsage &AU) const override {
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000270 AU.addRequired<LoopInfoWrapperPass>();
271 AU.addRequiredID(LoopSimplifyID);
272 AU.addRequiredID(LCSSAID);
273 AU.addPreservedID(LCSSAID);
274 AU.setPreservesCFG();
275 }
276
277 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
278
279 private:
280 SetVector<DepChain *> Dependences;
281 std::set<Instruction *> ReplacedInsts;
282 Loop *CurLoop;
283 ReuseValue ReuseCandidate;
284
285 bool doVLCR();
286 void findLoopCarriedDeps();
287 void findValueToReuse();
288 void findDepChainFromPHI(Instruction *I, DepChain &D);
289 void reuseValue();
290 Value *findValueInBlock(Value *Op, BasicBlock *BB);
291 bool isDepChainBtwn(Instruction *I1, Instruction *I2, int Iters);
292 DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2);
293 bool isEquivalentOperation(Instruction *I1, Instruction *I2);
294 bool canReplace(Instruction *I);
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000295 };
Eugene Zelenko3b873362017-09-28 22:27:31 +0000296
297} // end anonymous namespace
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000298
299char HexagonVectorLoopCarriedReuse::ID = 0;
300
301INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",
302 "Hexagon-specific predictive commoning for HVX vectors", false, false)
303INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
304INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
305INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
306INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",
307 "Hexagon-specific predictive commoning for HVX vectors", false, false)
308
309bool HexagonVectorLoopCarriedReuse::runOnLoop(Loop *L, LPPassManager &LPM) {
310 if (skipLoop(L))
311 return false;
312
313 if (!L->getLoopPreheader())
314 return false;
315
316 // Work only on innermost loops.
Eugene Zelenko3b873362017-09-28 22:27:31 +0000317 if (!L->getSubLoops().empty())
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000318 return false;
319
320 // Work only on single basic blocks loops.
321 if (L->getNumBlocks() != 1)
322 return false;
323
324 CurLoop = L;
325
326 return doVLCR();
327}
328
329bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
330 Instruction *I2) {
331 if (!I1->isSameOperationAs(I2))
332 return false;
333 // This check is in place specifically for intrinsics. isSameOperationAs will
334 // return two for any two hexagon intrinsics because they are essentially the
335 // same instruciton (CallInst). We need to scratch the surface to see if they
336 // are calls to the same function.
337 if (CallInst *C1 = dyn_cast<CallInst>(I1)) {
338 if (CallInst *C2 = dyn_cast<CallInst>(I2)) {
339 if (C1->getCalledFunction() != C2->getCalledFunction())
340 return false;
341 }
342 }
Ron Lieberman9bcdd802017-10-02 00:34:07 +0000343
344 // If both the Instructions are of Vector Type and any of the element
345 // is integer constant, check their values too for equivalence.
346 if (I1->getType()->isVectorTy() && I2->getType()->isVectorTy()) {
347 unsigned NumOperands = I1->getNumOperands();
348 for (unsigned i = 0; i < NumOperands; ++i) {
349 ConstantInt *C1 = dyn_cast<ConstantInt>(I1->getOperand(i));
350 ConstantInt *C2 = dyn_cast<ConstantInt>(I2->getOperand(i));
351 if(!C1) continue;
352 assert(C2);
353 if (C1->getSExtValue() != C2->getSExtValue())
354 return false;
355 }
356 }
357
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000358 return true;
359}
360
361bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) {
362 const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
363 if (II &&
364 (II->getIntrinsicID() == Intrinsic::hexagon_V6_hi ||
365 II->getIntrinsicID() == Intrinsic::hexagon_V6_lo)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000366 LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n");
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000367 return false;
368 }
369 return true;
370}
371void HexagonVectorLoopCarriedReuse::findValueToReuse() {
372 for (auto *D : Dependences) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000373 LLVM_DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n");
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000374 if (D->iterations() > HexagonVLCRIterationLim) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000375 LLVM_DEBUG(
376 dbgs()
377 << ".. Skipping because number of iterations > than the limit\n");
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000378 continue;
379 }
380
381 PHINode *PN = cast<PHINode>(D->front());
382 Instruction *BEInst = D->back();
383 int Iters = D->iterations();
384 BasicBlock *BB = PN->getParent();
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000385 LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN
386 << " can be reused\n");
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000387
388 SmallVector<Instruction *, 4> PNUsers;
389 for (auto UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) {
390 Use &U = *UI;
391 Instruction *User = cast<Instruction>(U.getUser());
392
393 if (User->getParent() != BB)
394 continue;
395 if (ReplacedInsts.count(User)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000396 LLVM_DEBUG(dbgs() << *User
397 << " has already been replaced. Skipping...\n");
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000398 continue;
399 }
400 if (isa<PHINode>(User))
401 continue;
402 if (User->mayHaveSideEffects())
403 continue;
404 if (!canReplace(User))
405 continue;
406
407 PNUsers.push_back(User);
408 }
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000409 LLVM_DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n");
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000410
411 // For each interesting use I of PN, find an Instruction BEUser that
412 // performs the same operation as I on BEInst and whose other operands,
413 // if any, can also be rematerialized in OtherBB. We stop when we find the
414 // first such Instruction BEUser. This is because once BEUser is
415 // rematerialized in OtherBB, we may find more such "fixup" opportunities
416 // in this block. So, we'll start over again.
417 for (Instruction *I : PNUsers) {
418 for (auto UI = BEInst->use_begin(), E = BEInst->use_end(); UI != E;
419 ++UI) {
420 Use &U = *UI;
421 Instruction *BEUser = cast<Instruction>(U.getUser());
422
423 if (BEUser->getParent() != BB)
424 continue;
425 if (!isEquivalentOperation(I, BEUser))
426 continue;
427
428 int NumOperands = I->getNumOperands();
429
430 for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
431 Value *Op = I->getOperand(OpNo);
432 Instruction *OpInst = dyn_cast<Instruction>(Op);
433 if (!OpInst)
434 continue;
435
436 Value *BEOp = BEUser->getOperand(OpNo);
437 Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
438
439 if (!isDepChainBtwn(OpInst, BEOpInst, Iters)) {
440 BEUser = nullptr;
441 break;
442 }
443 }
444 if (BEUser) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000445 LLVM_DEBUG(dbgs() << "Found Value for reuse.\n");
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000446 ReuseCandidate.Inst2Replace = I;
447 ReuseCandidate.BackedgeInst = BEUser;
448 return;
449 } else
450 ReuseCandidate.reset();
451 }
452 }
453 }
454 ReuseCandidate.reset();
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000455}
Eugene Zelenko3b873362017-09-28 22:27:31 +0000456
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000457Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op,
458 BasicBlock *BB) {
459 PHINode *PN = dyn_cast<PHINode>(Op);
460 assert(PN);
461 Value *ValueInBlock = PN->getIncomingValueForBlock(BB);
462 return ValueInBlock;
463}
Eugene Zelenko3b873362017-09-28 22:27:31 +0000464
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000465void HexagonVectorLoopCarriedReuse::reuseValue() {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000466 LLVM_DEBUG(dbgs() << ReuseCandidate);
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000467 Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
468 Instruction *BEInst = ReuseCandidate.BackedgeInst;
469 int NumOperands = Inst2Replace->getNumOperands();
470 std::map<Instruction *, DepChain *> DepChains;
471 int Iterations = -1;
472 BasicBlock *LoopPH = CurLoop->getLoopPreheader();
473
474 for (int i = 0; i < NumOperands; ++i) {
475 Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(i));
476 if(!I)
477 continue;
478 else {
479 Instruction *J = cast<Instruction>(BEInst->getOperand(i));
480 DepChain *D = getDepChainBtwn(I, J);
481
482 assert(D &&
483 "No DepChain between corresponding operands in ReuseCandidate\n");
484 if (Iterations == -1)
485 Iterations = D->iterations();
486 assert(Iterations == D->iterations() && "Iterations mismatch");
487 DepChains[I] = D;
488 }
489 }
490
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000491 LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n");
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000492
493 SmallVector<Instruction *, 4> InstsInPreheader;
494 for (int i = 0; i < Iterations; ++i) {
495 Instruction *InstInPreheader = Inst2Replace->clone();
496 SmallVector<Value *, 4> Ops;
497 for (int j = 0; j < NumOperands; ++j) {
498 Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(j));
499 if (!I)
500 continue;
501 // Get the DepChain corresponding to this operand.
502 DepChain &D = *DepChains[I];
503 // Get the PHI for the iteration number and find
504 // the incoming value from the Loop Preheader for
505 // that PHI.
506 Value *ValInPreheader = findValueInBlock(D[i], LoopPH);
507 InstInPreheader->setOperand(j, ValInPreheader);
508 }
509 InstsInPreheader.push_back(InstInPreheader);
510 InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr");
511 InstInPreheader->insertBefore(LoopPH->getTerminator());
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000512 LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader << " to "
513 << LoopPH->getName() << "\n");
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000514 }
515 BasicBlock *BB = BEInst->getParent();
516 IRBuilder<> IRB(BB);
517 IRB.SetInsertPoint(BB->getFirstNonPHI());
518 Value *BEVal = BEInst;
519 PHINode *NewPhi;
520 for (int i = Iterations-1; i >=0 ; --i) {
521 Instruction *InstInPreheader = InstsInPreheader[i];
522 NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2);
523 NewPhi->addIncoming(InstInPreheader, LoopPH);
524 NewPhi->addIncoming(BEVal, BB);
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000525 LLVM_DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName()
526 << "\n");
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000527 BEVal = NewPhi;
528 }
529 // We are in LCSSA form. So, a value defined inside the Loop is used only
530 // inside the loop. So, the following is safe.
531 Inst2Replace->replaceAllUsesWith(NewPhi);
532 ReplacedInsts.insert(Inst2Replace);
533 ++HexagonNumVectorLoopCarriedReuse;
534}
535
536bool HexagonVectorLoopCarriedReuse::doVLCR() {
Eugene Zelenko3b873362017-09-28 22:27:31 +0000537 assert(CurLoop->getSubLoops().empty() &&
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000538 "Can do VLCR on the innermost loop only");
539 assert((CurLoop->getNumBlocks() == 1) &&
540 "Can do VLCR only on single block loops");
541
Ron Lieberman9bcdd802017-10-02 00:34:07 +0000542 bool Changed = false;
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000543 bool Continue;
544
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000545 LLVM_DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n");
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000546 do {
547 // Reset datastructures.
548 Dependences.clear();
549 Continue = false;
550
551 findLoopCarriedDeps();
552 findValueToReuse();
553 if (ReuseCandidate.isDefined()) {
554 reuseValue();
Dimitry Andrice44dea92017-12-18 18:56:00 +0000555 Changed = true;
556 Continue = true;
557 }
558 llvm::for_each(Dependences, std::default_delete<DepChain>());
559 } while (Continue);
560 return Changed;
561}
Eugene Zelenko3b873362017-09-28 22:27:31 +0000562
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000563void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I,
564 DepChain &D) {
565 PHINode *PN = dyn_cast<PHINode>(I);
566 if (!PN) {
567 D.push_back(I);
568 return;
569 } else {
570 auto NumIncomingValues = PN->getNumIncomingValues();
571 if (NumIncomingValues != 2) {
572 D.clear();
573 return;
574 }
575
576 BasicBlock *BB = PN->getParent();
577 if (BB != CurLoop->getHeader()) {
578 D.clear();
579 return;
580 }
581
582 Value *BEVal = PN->getIncomingValueForBlock(BB);
583 Instruction *BEInst = dyn_cast<Instruction>(BEVal);
584 // This is a single block loop with a preheader, so at least
585 // one value should come over the backedge.
586 assert(BEInst && "There should be a value over the backedge");
587
588 Value *PreHdrVal =
589 PN->getIncomingValueForBlock(CurLoop->getLoopPreheader());
590 if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) {
591 D.clear();
592 return;
593 }
594 D.push_back(PN);
595 findDepChainFromPHI(BEInst, D);
596 }
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000597}
598
599bool HexagonVectorLoopCarriedReuse::isDepChainBtwn(Instruction *I1,
600 Instruction *I2,
601 int Iters) {
602 for (auto *D : Dependences) {
603 if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)
604 return true;
605 }
606 return false;
607}
Eugene Zelenko3b873362017-09-28 22:27:31 +0000608
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000609DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1,
610 Instruction *I2) {
611 for (auto *D : Dependences) {
612 if (D->front() == I1 && D->back() == I2)
613 return D;
614 }
615 return nullptr;
616}
Eugene Zelenko3b873362017-09-28 22:27:31 +0000617
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000618void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
619 BasicBlock *BB = CurLoop->getHeader();
620 for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
621 auto *PN = cast<PHINode>(I);
622 if (!isa<VectorType>(PN->getType()))
623 continue;
624
625 DepChain *D = new DepChain();
626 findDepChainFromPHI(PN, *D);
627 if (D->size() != 0)
628 Dependences.insert(D);
629 else
630 delete D;
631 }
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000632 LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n");
633 LLVM_DEBUG(for (size_t i = 0; i < Dependences.size();
634 ++i) { dbgs() << *Dependences[i] << "\n"; });
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000635}
Eugene Zelenko3b873362017-09-28 22:27:31 +0000636
Pranav Bhandarkar931d0b72017-09-21 21:48:23 +0000637Pass *llvm::createHexagonVectorLoopCarriedReusePass() {
638 return new HexagonVectorLoopCarriedReuse();
639}