blob: 0128380718456cbbcd651692deb969e5db57268a [file] [log] [blame]
James Molloya9290632017-05-25 12:51:11 +00001//===- GVNSink.cpp - sink expressions into successors -------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file GVNSink.cpp
11/// This pass attempts to sink instructions into successors, reducing static
12/// instruction count and enabling if-conversion.
13///
14/// We use a variant of global value numbering to decide what can be sunk.
15/// Consider:
16///
17/// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ]
18/// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ]
19/// \ /
20/// [ %e = phi i32 %a2, %c2 ]
21/// [ add i32 %e, 4 ]
22///
23///
24/// GVN would number %a1 and %c1 differently because they compute different
25/// results - the VN of an instruction is a function of its opcode and the
26/// transitive closure of its operands. This is the key property for hoisting
27/// and CSE.
28///
29/// What we want when sinking however is for a numbering that is a function of
30/// the *uses* of an instruction, which allows us to answer the question "if I
31/// replace %a1 with %c1, will it contribute in an equivalent way to all
32/// successive instructions?". The PostValueTable class in GVN provides this
33/// mapping.
34///
35//===----------------------------------------------------------------------===//
36
37#include "llvm/ADT/DenseMap.h"
38#include "llvm/ADT/DenseMapInfo.h"
39#include "llvm/ADT/DenseSet.h"
40#include "llvm/ADT/Hashing.h"
41#include "llvm/ADT/Optional.h"
42#include "llvm/ADT/PostOrderIterator.h"
43#include "llvm/ADT/SCCIterator.h"
44#include "llvm/ADT/SmallPtrSet.h"
45#include "llvm/ADT/Statistic.h"
46#include "llvm/ADT/StringExtras.h"
47#include "llvm/Analysis/GlobalsModRef.h"
48#include "llvm/Analysis/MemorySSA.h"
49#include "llvm/Analysis/PostDominators.h"
50#include "llvm/Analysis/TargetTransformInfo.h"
51#include "llvm/Analysis/ValueTracking.h"
52#include "llvm/IR/Instructions.h"
53#include "llvm/IR/Verifier.h"
54#include "llvm/Support/MathExtras.h"
55#include "llvm/Transforms/Scalar.h"
56#include "llvm/Transforms/Scalar/GVN.h"
57#include "llvm/Transforms/Scalar/GVNExpression.h"
58#include "llvm/Transforms/Utils/BasicBlockUtils.h"
59#include "llvm/Transforms/Utils/Local.h"
60#include <unordered_set>
61using namespace llvm;
62
63#define DEBUG_TYPE "gvn-sink"
64
65STATISTIC(NumRemoved, "Number of instructions removed");
66
Frederich Munchdceb6122017-06-14 19:16:22 +000067namespace llvm {
68namespace GVNExpression {
69
70LLVM_DUMP_METHOD void Expression::dump() const {
71 print(dbgs());
72 dbgs() << "\n";
73}
74
75}
76}
77
James Molloya9290632017-05-25 12:51:11 +000078namespace {
79
80static bool isMemoryInst(const Instruction *I) {
81 return isa<LoadInst>(I) || isa<StoreInst>(I) ||
82 (isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
83 (isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory());
84}
85
86/// Iterates through instructions in a set of blocks in reverse order from the
87/// first non-terminator. For example (assume all blocks have size n):
88/// LockstepReverseIterator I([B1, B2, B3]);
89/// *I-- = [B1[n], B2[n], B3[n]];
90/// *I-- = [B1[n-1], B2[n-1], B3[n-1]];
91/// *I-- = [B1[n-2], B2[n-2], B3[n-2]];
92/// ...
93///
94/// It continues until all blocks have been exhausted. Use \c getActiveBlocks()
95/// to
96/// determine which blocks are still going and the order they appear in the
97/// list returned by operator*.
98class LockstepReverseIterator {
99 ArrayRef<BasicBlock *> Blocks;
100 SmallPtrSet<BasicBlock *, 4> ActiveBlocks;
101 SmallVector<Instruction *, 4> Insts;
102 bool Fail;
103
104public:
105 LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
106 reset();
107 }
108
109 void reset() {
110 Fail = false;
111 ActiveBlocks.clear();
112 for (BasicBlock *BB : Blocks)
113 ActiveBlocks.insert(BB);
114 Insts.clear();
115 for (BasicBlock *BB : Blocks) {
116 if (BB->size() <= 1) {
117 // Block wasn't big enough - only contained a terminator.
118 ActiveBlocks.erase(BB);
119 continue;
120 }
121 Insts.push_back(BB->getTerminator()->getPrevNode());
122 }
123 if (Insts.empty())
124 Fail = true;
125 }
126
127 bool isValid() const { return !Fail; }
128 ArrayRef<Instruction *> operator*() const { return Insts; }
129 SmallPtrSet<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; }
130
131 void restrictToBlocks(SmallPtrSetImpl<BasicBlock *> &Blocks) {
132 for (auto II = Insts.begin(); II != Insts.end();) {
133 if (std::find(Blocks.begin(), Blocks.end(), (*II)->getParent()) ==
134 Blocks.end()) {
135 ActiveBlocks.erase((*II)->getParent());
136 II = Insts.erase(II);
137 } else {
138 ++II;
139 }
140 }
141 }
142
143 void operator--() {
144 if (Fail)
145 return;
146 SmallVector<Instruction *, 4> NewInsts;
147 for (auto *Inst : Insts) {
148 if (Inst == &Inst->getParent()->front())
149 ActiveBlocks.erase(Inst->getParent());
150 else
151 NewInsts.push_back(Inst->getPrevNode());
152 }
153 if (NewInsts.empty()) {
154 Fail = true;
155 return;
156 }
157 Insts = NewInsts;
158 }
159};
160
161//===----------------------------------------------------------------------===//
162
163/// Candidate solution for sinking. There may be different ways to
164/// sink instructions, differing in the number of instructions sunk,
165/// the number of predecessors sunk from and the number of PHIs
166/// required.
167struct SinkingInstructionCandidate {
168 unsigned NumBlocks;
169 unsigned NumInstructions;
170 unsigned NumPHIs;
171 unsigned NumMemoryInsts;
172 int Cost = -1;
173 SmallVector<BasicBlock *, 4> Blocks;
174
175 void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
176 unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
177 unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
178 Cost = (NumInstructions * (NumBlocks - 1)) -
179 (NumExtraPHIs *
180 NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
181 - SplitEdgeCost;
182 }
Galina Kistanovae1289582017-06-08 17:27:40 +0000183 bool operator>(const SinkingInstructionCandidate &Other) const {
184 return Cost > Other.Cost;
James Molloya9290632017-05-25 12:51:11 +0000185 }
186};
187
James Molloy2a237f12017-05-25 13:11:18 +0000188#ifndef NDEBUG
James Molloya9290632017-05-25 12:51:11 +0000189llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
190 const SinkingInstructionCandidate &C) {
191 OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
192 << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
193 return OS;
194}
James Molloy2a237f12017-05-25 13:11:18 +0000195#endif
James Molloya9290632017-05-25 12:51:11 +0000196
197//===----------------------------------------------------------------------===//
198
199/// Describes a PHI node that may or may not exist. These track the PHIs
200/// that must be created if we sunk a sequence of instructions. It provides
201/// a hash function for efficient equality comparisons.
202class ModelledPHI {
203 SmallVector<Value *, 4> Values;
204 SmallVector<BasicBlock *, 4> Blocks;
205
206public:
207 ModelledPHI() {}
208 ModelledPHI(const PHINode *PN) {
Daniel Berlin064cb682017-09-20 00:07:27 +0000209 // BasicBlock comes first so we sort by basic block pointer order, then by value pointer order.
210 SmallVector<std::pair<BasicBlock *, Value *>, 4> Ops;
James Molloya9290632017-05-25 12:51:11 +0000211 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
Daniel Berlin064cb682017-09-20 00:07:27 +0000212 Ops.push_back({PN->getIncomingBlock(I), PN->getIncomingValue(I)});
213 std::sort(Ops.begin(), Ops.end());
214 for (auto &P : Ops) {
215 Blocks.push_back(P.first);
216 Values.push_back(P.second);
217 }
James Molloya9290632017-05-25 12:51:11 +0000218 }
219 /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
220 /// without the same ID.
221 /// \note This is specifically for DenseMapInfo - do not use this!
James Molloydc2d64b2017-05-25 13:14:10 +0000222 static ModelledPHI createDummy(size_t ID) {
James Molloya9290632017-05-25 12:51:11 +0000223 ModelledPHI M;
224 M.Values.push_back(reinterpret_cast<Value*>(ID));
225 return M;
226 }
227
228 /// Create a PHI from an array of incoming values and incoming blocks.
229 template <typename VArray, typename BArray>
230 ModelledPHI(const VArray &V, const BArray &B) {
231 std::copy(V.begin(), V.end(), std::back_inserter(Values));
232 std::copy(B.begin(), B.end(), std::back_inserter(Blocks));
233 }
234
235 /// Create a PHI from [I[OpNum] for I in Insts].
236 template <typename BArray>
237 ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, const BArray &B) {
238 std::copy(B.begin(), B.end(), std::back_inserter(Blocks));
239 for (auto *I : Insts)
240 Values.push_back(I->getOperand(OpNum));
241 }
242
243 /// Restrict the PHI's contents down to only \c NewBlocks.
244 /// \c NewBlocks must be a subset of \c this->Blocks.
245 void restrictToBlocks(const SmallPtrSetImpl<BasicBlock *> &NewBlocks) {
246 auto BI = Blocks.begin();
247 auto VI = Values.begin();
248 while (BI != Blocks.end()) {
249 assert(VI != Values.end());
250 if (std::find(NewBlocks.begin(), NewBlocks.end(), *BI) ==
251 NewBlocks.end()) {
252 BI = Blocks.erase(BI);
253 VI = Values.erase(VI);
254 } else {
255 ++BI;
256 ++VI;
257 }
258 }
259 assert(Blocks.size() == NewBlocks.size());
260 }
261
262 ArrayRef<Value *> getValues() const { return Values; }
263
264 bool areAllIncomingValuesSame() const {
265 return all_of(Values, [&](Value *V) { return V == Values[0]; });
266 }
267 bool areAllIncomingValuesSameType() const {
268 return all_of(
269 Values, [&](Value *V) { return V->getType() == Values[0]->getType(); });
270 }
271 bool areAnyIncomingValuesConstant() const {
272 return any_of(Values, [&](Value *V) { return isa<Constant>(V); });
273 }
274 // Hash functor
275 unsigned hash() const {
276 return (unsigned)hash_combine_range(Values.begin(), Values.end());
277 }
278 bool operator==(const ModelledPHI &Other) const {
279 return Values == Other.Values && Blocks == Other.Blocks;
280 }
281};
282
283template <typename ModelledPHI> struct DenseMapInfo {
284 static inline ModelledPHI &getEmptyKey() {
285 static ModelledPHI Dummy = ModelledPHI::createDummy(0);
286 return Dummy;
287 }
288 static inline ModelledPHI &getTombstoneKey() {
289 static ModelledPHI Dummy = ModelledPHI::createDummy(1);
290 return Dummy;
291 }
292 static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
293 static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
294 return LHS == RHS;
295 }
296};
297
298typedef DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>> ModelledPHISet;
299
300//===----------------------------------------------------------------------===//
301// ValueTable
302//===----------------------------------------------------------------------===//
303// This is a value number table where the value number is a function of the
304// *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
305// that the program would be equivalent if we replaced A with PHI(A, B).
306//===----------------------------------------------------------------------===//
307
308/// A GVN expression describing how an instruction is used. The operands
309/// field of BasicExpression is used to store uses, not operands.
310///
311/// This class also contains fields for discriminators used when determining
312/// equivalence of instructions with sideeffects.
313class InstructionUseExpr : public GVNExpression::BasicExpression {
314 unsigned MemoryUseOrder = -1;
315 bool Volatile = false;
316
317public:
318 InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
319 BumpPtrAllocator &A)
320 : GVNExpression::BasicExpression(I->getNumUses()) {
321 allocateOperands(R, A);
322 setOpcode(I->getOpcode());
323 setType(I->getType());
324
325 for (auto &U : I->uses())
326 op_push_back(U.getUser());
327 std::sort(op_begin(), op_end());
328 }
329 void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
330 void setVolatile(bool V) { Volatile = V; }
331
332 virtual hash_code getHashValue() const {
333 return hash_combine(GVNExpression::BasicExpression::getHashValue(),
334 MemoryUseOrder, Volatile);
335 }
336
337 template <typename Function> hash_code getHashValue(Function MapFn) {
338 hash_code H =
339 hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile);
340 for (auto *V : operands())
341 H = hash_combine(H, MapFn(V));
342 return H;
343 }
344};
345
346class ValueTable {
347 DenseMap<Value *, uint32_t> ValueNumbering;
348 DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
349 DenseMap<size_t, uint32_t> HashNumbering;
350 BumpPtrAllocator Allocator;
351 ArrayRecycler<Value *> Recycler;
352 uint32_t nextValueNumber;
353
354 /// Create an expression for I based on its opcode and its uses. If I
355 /// touches or reads memory, the expression is also based upon its memory
356 /// order - see \c getMemoryUseOrder().
357 InstructionUseExpr *createExpr(Instruction *I) {
358 InstructionUseExpr *E =
359 new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
360 if (isMemoryInst(I))
361 E->setMemoryUseOrder(getMemoryUseOrder(I));
362
363 if (CmpInst *C = dyn_cast<CmpInst>(I)) {
364 CmpInst::Predicate Predicate = C->getPredicate();
365 E->setOpcode((C->getOpcode() << 8) | Predicate);
366 }
367 return E;
368 }
369
370 /// Helper to compute the value number for a memory instruction
371 /// (LoadInst/StoreInst), including checking the memory ordering and
372 /// volatility.
373 template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
374 if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
375 return nullptr;
376 InstructionUseExpr *E = createExpr(I);
377 E->setVolatile(I->isVolatile());
378 return E;
379 }
380
381public:
382 /// Returns the value number for the specified value, assigning
383 /// it a new number if it did not have one before.
384 uint32_t lookupOrAdd(Value *V) {
385 auto VI = ValueNumbering.find(V);
386 if (VI != ValueNumbering.end())
387 return VI->second;
388
389 if (!isa<Instruction>(V)) {
390 ValueNumbering[V] = nextValueNumber;
391 return nextValueNumber++;
392 }
393
394 Instruction *I = cast<Instruction>(V);
395 InstructionUseExpr *exp = nullptr;
396 switch (I->getOpcode()) {
397 case Instruction::Load:
398 exp = createMemoryExpr(cast<LoadInst>(I));
399 break;
400 case Instruction::Store:
401 exp = createMemoryExpr(cast<StoreInst>(I));
402 break;
403 case Instruction::Call:
404 case Instruction::Invoke:
405 case Instruction::Add:
406 case Instruction::FAdd:
407 case Instruction::Sub:
408 case Instruction::FSub:
409 case Instruction::Mul:
410 case Instruction::FMul:
411 case Instruction::UDiv:
412 case Instruction::SDiv:
413 case Instruction::FDiv:
414 case Instruction::URem:
415 case Instruction::SRem:
416 case Instruction::FRem:
417 case Instruction::Shl:
418 case Instruction::LShr:
419 case Instruction::AShr:
420 case Instruction::And:
421 case Instruction::Or:
422 case Instruction::Xor:
423 case Instruction::ICmp:
424 case Instruction::FCmp:
425 case Instruction::Trunc:
426 case Instruction::ZExt:
427 case Instruction::SExt:
428 case Instruction::FPToUI:
429 case Instruction::FPToSI:
430 case Instruction::UIToFP:
431 case Instruction::SIToFP:
432 case Instruction::FPTrunc:
433 case Instruction::FPExt:
434 case Instruction::PtrToInt:
435 case Instruction::IntToPtr:
436 case Instruction::BitCast:
437 case Instruction::Select:
438 case Instruction::ExtractElement:
439 case Instruction::InsertElement:
440 case Instruction::ShuffleVector:
441 case Instruction::InsertValue:
442 case Instruction::GetElementPtr:
443 exp = createExpr(I);
444 break;
445 default:
446 break;
447 }
448
449 if (!exp) {
450 ValueNumbering[V] = nextValueNumber;
451 return nextValueNumber++;
452 }
453
454 uint32_t e = ExpressionNumbering[exp];
455 if (!e) {
456 hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); });
457 auto I = HashNumbering.find(H);
458 if (I != HashNumbering.end()) {
459 e = I->second;
460 } else {
461 e = nextValueNumber++;
462 HashNumbering[H] = e;
463 ExpressionNumbering[exp] = e;
464 }
465 }
466 ValueNumbering[V] = e;
467 return e;
468 }
469
470 /// Returns the value number of the specified value. Fails if the value has
471 /// not yet been numbered.
472 uint32_t lookup(Value *V) const {
473 auto VI = ValueNumbering.find(V);
474 assert(VI != ValueNumbering.end() && "Value not numbered?");
475 return VI->second;
476 }
477
478 /// Removes all value numberings and resets the value table.
479 void clear() {
480 ValueNumbering.clear();
481 ExpressionNumbering.clear();
482 HashNumbering.clear();
483 Recycler.clear(Allocator);
484 nextValueNumber = 1;
485 }
486
487 ValueTable() : nextValueNumber(1) {}
488
489 /// \c Inst uses or touches memory. Return an ID describing the memory state
490 /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
491 /// the exact same memory operations happen after I1 and I2.
492 ///
493 /// This is a very hard problem in general, so we use domain-specific
494 /// knowledge that we only ever check for equivalence between blocks sharing a
495 /// single immediate successor that is common, and when determining if I1 ==
496 /// I2 we will have already determined that next(I1) == next(I2). This
497 /// inductive property allows us to simply return the value number of the next
498 /// instruction that defines memory.
499 uint32_t getMemoryUseOrder(Instruction *Inst) {
500 auto *BB = Inst->getParent();
501 for (auto I = std::next(Inst->getIterator()), E = BB->end();
502 I != E && !I->isTerminator(); ++I) {
503 if (!isMemoryInst(&*I))
504 continue;
505 if (isa<LoadInst>(&*I))
506 continue;
507 CallInst *CI = dyn_cast<CallInst>(&*I);
508 if (CI && CI->onlyReadsMemory())
509 continue;
510 InvokeInst *II = dyn_cast<InvokeInst>(&*I);
511 if (II && II->onlyReadsMemory())
512 continue;
513 return lookupOrAdd(&*I);
514 }
515 return 0;
516 }
517};
518
519//===----------------------------------------------------------------------===//
520
521class GVNSink {
522public:
523 GVNSink() : VN() {}
524 bool run(Function &F) {
525 DEBUG(dbgs() << "GVNSink: running on function @" << F.getName() << "\n");
526
527 unsigned NumSunk = 0;
528 ReversePostOrderTraversal<Function*> RPOT(&F);
529 for (auto *N : RPOT)
530 NumSunk += sinkBB(N);
531
532 return NumSunk > 0;
533 }
534
535private:
536 ValueTable VN;
537
538 bool isInstructionBlacklisted(Instruction *I) {
539 // These instructions may change or break semantics if moved.
540 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
541 I->getType()->isTokenTy())
542 return true;
543 return false;
544 }
545
546 /// The main heuristic function. Analyze the set of instructions pointed to by
547 /// LRI and return a candidate solution if these instructions can be sunk, or
548 /// None otherwise.
549 Optional<SinkingInstructionCandidate> analyzeInstructionForSinking(
550 LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
551 ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents);
552
553 /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
554 void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
555 SmallPtrSetImpl<Value *> &PHIContents) {
556 for (auto &I : *BB) {
557 auto *PN = dyn_cast<PHINode>(&I);
558 if (!PN)
559 return;
560
561 auto MPHI = ModelledPHI(PN);
562 PHIs.insert(MPHI);
563 for (auto *V : MPHI.getValues())
564 PHIContents.insert(V);
565 }
566 }
567
568 /// The main instruction sinking driver. Set up state and try and sink
569 /// instructions into BBEnd from its predecessors.
570 unsigned sinkBB(BasicBlock *BBEnd);
571
572 /// Perform the actual mechanics of sinking an instruction from Blocks into
573 /// BBEnd, which is their only successor.
574 void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd);
575
576 /// Remove PHIs that all have the same incoming value.
577 void foldPointlessPHINodes(BasicBlock *BB) {
578 auto I = BB->begin();
579 while (PHINode *PN = dyn_cast<PHINode>(I++)) {
580 if (!all_of(PN->incoming_values(),
581 [&](const Value *V) { return V == PN->getIncomingValue(0); }))
582 continue;
583 if (PN->getIncomingValue(0) != PN)
584 PN->replaceAllUsesWith(PN->getIncomingValue(0));
585 else
586 PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
587 PN->eraseFromParent();
588 }
589 }
590};
591
592Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
593 LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
594 ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents) {
595 auto Insts = *LRI;
596 DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
597 : Insts) {
598 I->dump();
599 } dbgs() << " ]\n";);
600
601 DenseMap<uint32_t, unsigned> VNums;
602 for (auto *I : Insts) {
603 uint32_t N = VN.lookupOrAdd(I);
604 DEBUG(dbgs() << " VN=" << utohexstr(N) << " for" << *I << "\n");
605 if (N == ~0U)
606 return None;
607 VNums[N]++;
608 }
609 unsigned VNumToSink =
610 std::max_element(VNums.begin(), VNums.end(),
611 [](const std::pair<uint32_t, unsigned> &I,
612 const std::pair<uint32_t, unsigned> &J) {
613 return I.second < J.second;
614 })
615 ->first;
616
617 if (VNums[VNumToSink] == 1)
618 // Can't sink anything!
619 return None;
620
621 // Now restrict the number of incoming blocks down to only those with
622 // VNumToSink.
623 auto &ActivePreds = LRI.getActiveBlocks();
624 unsigned InitialActivePredSize = ActivePreds.size();
625 SmallVector<Instruction *, 4> NewInsts;
626 for (auto *I : Insts) {
627 if (VN.lookup(I) != VNumToSink)
628 ActivePreds.erase(I->getParent());
629 else
630 NewInsts.push_back(I);
631 }
632 for (auto *I : NewInsts)
633 if (isInstructionBlacklisted(I))
634 return None;
635
636 // If we've restricted the incoming blocks, restrict all needed PHIs also
637 // to that set.
638 bool RecomputePHIContents = false;
639 if (ActivePreds.size() != InitialActivePredSize) {
640 ModelledPHISet NewNeededPHIs;
641 for (auto P : NeededPHIs) {
642 P.restrictToBlocks(ActivePreds);
643 NewNeededPHIs.insert(P);
644 }
645 NeededPHIs = NewNeededPHIs;
646 LRI.restrictToBlocks(ActivePreds);
647 RecomputePHIContents = true;
648 }
649
650 // The sunk instruction's results.
651 ModelledPHI NewPHI(NewInsts, ActivePreds);
652
653 // Does sinking this instruction render previous PHIs redundant?
654 if (NeededPHIs.find(NewPHI) != NeededPHIs.end()) {
655 NeededPHIs.erase(NewPHI);
656 RecomputePHIContents = true;
657 }
658
659 if (RecomputePHIContents) {
660 // The needed PHIs have changed, so recompute the set of all needed
661 // values.
662 PHIContents.clear();
663 for (auto &PHI : NeededPHIs)
664 PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
665 }
666
667 // Is this instruction required by a later PHI that doesn't match this PHI?
668 // if so, we can't sink this instruction.
669 for (auto *V : NewPHI.getValues())
670 if (PHIContents.count(V))
671 // V exists in this PHI, but the whole PHI is different to NewPHI
672 // (else it would have been removed earlier). We cannot continue
673 // because this isn't representable.
674 return None;
675
676 // Which operands need PHIs?
677 // FIXME: If any of these fail, we should partition up the candidates to
678 // try and continue making progress.
679 Instruction *I0 = NewInsts[0];
680 for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
681 ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
682 if (PHI.areAllIncomingValuesSame())
683 continue;
684 if (!canReplaceOperandWithVariable(I0, OpNum))
685 // We can 't create a PHI from this instruction!
686 return None;
687 if (NeededPHIs.count(PHI))
688 continue;
689 if (!PHI.areAllIncomingValuesSameType())
690 return None;
691 // Don't create indirect calls! The called value is the final operand.
692 if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 &&
693 PHI.areAnyIncomingValuesConstant())
694 return None;
695
696 NeededPHIs.reserve(NeededPHIs.size());
697 NeededPHIs.insert(PHI);
698 PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
699 }
700
701 if (isMemoryInst(NewInsts[0]))
702 ++MemoryInstNum;
703
704 SinkingInstructionCandidate Cand;
705 Cand.NumInstructions = ++InstNum;
706 Cand.NumMemoryInsts = MemoryInstNum;
707 Cand.NumBlocks = ActivePreds.size();
708 Cand.NumPHIs = NeededPHIs.size();
709 for (auto *C : ActivePreds)
710 Cand.Blocks.push_back(C);
711
712 return Cand;
713}
714
715unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
716 DEBUG(dbgs() << "GVNSink: running on basic block ";
717 BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
718 SmallVector<BasicBlock *, 4> Preds;
719 for (auto *B : predecessors(BBEnd)) {
720 auto *T = B->getTerminator();
721 if (isa<BranchInst>(T) || isa<SwitchInst>(T))
722 Preds.push_back(B);
723 else
724 return 0;
725 }
726 if (Preds.size() < 2)
727 return 0;
728 std::sort(Preds.begin(), Preds.end());
729
730 unsigned NumOrigPreds = Preds.size();
731 // We can only sink instructions through unconditional branches.
732 for (auto I = Preds.begin(); I != Preds.end();) {
733 if ((*I)->getTerminator()->getNumSuccessors() != 1)
734 I = Preds.erase(I);
735 else
736 ++I;
737 }
738
739 LockstepReverseIterator LRI(Preds);
740 SmallVector<SinkingInstructionCandidate, 4> Candidates;
741 unsigned InstNum = 0, MemoryInstNum = 0;
742 ModelledPHISet NeededPHIs;
743 SmallPtrSet<Value *, 4> PHIContents;
744 analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);
745 unsigned NumOrigPHIs = NeededPHIs.size();
746
747 while (LRI.isValid()) {
748 auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
749 NeededPHIs, PHIContents);
750 if (!Cand)
751 break;
752 Cand->calculateCost(NumOrigPHIs, Preds.size());
753 Candidates.emplace_back(*Cand);
754 --LRI;
755 }
756
757 std::stable_sort(
758 Candidates.begin(), Candidates.end(),
759 [](const SinkingInstructionCandidate &A,
Galina Kistanovae1289582017-06-08 17:27:40 +0000760 const SinkingInstructionCandidate &B) { return A > B; });
James Molloya9290632017-05-25 12:51:11 +0000761 DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
762 : Candidates) dbgs()
763 << " " << C << "\n";);
764
765 // Pick the top candidate, as long it is positive!
766 if (Candidates.empty() || Candidates.front().Cost <= 0)
767 return 0;
768 auto C = Candidates.front();
769
770 DEBUG(dbgs() << " -- Sinking: " << C << "\n");
771 BasicBlock *InsertBB = BBEnd;
772 if (C.Blocks.size() < NumOrigPreds) {
773 DEBUG(dbgs() << " -- Splitting edge to "; BBEnd->printAsOperand(dbgs());
774 dbgs() << "\n");
775 InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split");
776 if (!InsertBB) {
777 DEBUG(dbgs() << " -- FAILED to split edge!\n");
778 // Edge couldn't be split.
779 return 0;
780 }
781 }
782
783 for (unsigned I = 0; I < C.NumInstructions; ++I)
784 sinkLastInstruction(C.Blocks, InsertBB);
785
786 return C.NumInstructions;
787}
788
789void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
790 BasicBlock *BBEnd) {
791 SmallVector<Instruction *, 4> Insts;
792 for (BasicBlock *BB : Blocks)
793 Insts.push_back(BB->getTerminator()->getPrevNode());
794 Instruction *I0 = Insts.front();
795
796 SmallVector<Value *, 4> NewOperands;
797 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
798 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
799 return I->getOperand(O) != I0->getOperand(O);
800 });
801 if (!NeedPHI) {
802 NewOperands.push_back(I0->getOperand(O));
803 continue;
804 }
805
806 // Create a new PHI in the successor block and populate it.
807 auto *Op = I0->getOperand(O);
808 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
809 auto *PN = PHINode::Create(Op->getType(), Insts.size(),
810 Op->getName() + ".sink", &BBEnd->front());
811 for (auto *I : Insts)
812 PN->addIncoming(I->getOperand(O), I->getParent());
813 NewOperands.push_back(PN);
814 }
815
816 // Arbitrarily use I0 as the new "common" instruction; remap its operands
817 // and move it to the start of the successor block.
818 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
819 I0->getOperandUse(O).set(NewOperands[O]);
820 I0->moveBefore(&*BBEnd->getFirstInsertionPt());
821
822 // Update metadata and IR flags.
823 for (auto *I : Insts)
824 if (I != I0) {
825 combineMetadataForCSE(I0, I);
826 I0->andIRFlags(I);
827 }
828
829 for (auto *I : Insts)
830 if (I != I0)
831 I->replaceAllUsesWith(I0);
832 foldPointlessPHINodes(BBEnd);
833
834 // Finally nuke all instructions apart from the common instruction.
835 for (auto *I : Insts)
836 if (I != I0)
837 I->eraseFromParent();
838
839 NumRemoved += Insts.size() - 1;
840}
841
842////////////////////////////////////////////////////////////////////////////////
843// Pass machinery / boilerplate
844
845class GVNSinkLegacyPass : public FunctionPass {
846public:
847 static char ID;
848
849 GVNSinkLegacyPass() : FunctionPass(ID) {
850 initializeGVNSinkLegacyPassPass(*PassRegistry::getPassRegistry());
851 }
852
853 bool runOnFunction(Function &F) override {
854 if (skipFunction(F))
855 return false;
856 GVNSink G;
857 return G.run(F);
858 }
859
860 void getAnalysisUsage(AnalysisUsage &AU) const override {
861 AU.addPreserved<GlobalsAAWrapperPass>();
862 }
863};
864} // namespace
865
866PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
867 GVNSink G;
868 if (!G.run(F))
869 return PreservedAnalyses::all();
870
871 PreservedAnalyses PA;
872 PA.preserve<GlobalsAA>();
873 return PA;
874}
875
876char GVNSinkLegacyPass::ID = 0;
877INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink",
878 "Early GVN sinking of Expressions", false, false)
879INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
880INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
881INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink",
882 "Early GVN sinking of Expressions", false, false)
883
884FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); }