Blame - llvm/lib/Transforms/Scalar/GVNSink.cpp - toolchain/llvm-project

blob: 5c75f39e381dd6109b08fdd49bed8158d342119f [file] [log] [blame]

James Molloy	a929063	2017-05-25 12:51:11 +0000	[diff] [blame]	1	//===- GVNSink.cpp - sink expressions into successors -------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	/// \file GVNSink.cpp
				11	/// This pass attempts to sink instructions into successors, reducing static
				12	/// instruction count and enabling if-conversion.
				13	///
				14	/// We use a variant of global value numbering to decide what can be sunk.
				15	/// Consider:
				16	///
				17	/// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ]
				18	/// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ]
				19	/// \ /
				20	/// [ %e = phi i32 %a2, %c2 ]
				21	/// [ add i32 %e, 4 ]
				22	///
				23	///
				24	/// GVN would number %a1 and %c1 differently because they compute different
				25	/// results - the VN of an instruction is a function of its opcode and the
				26	/// transitive closure of its operands. This is the key property for hoisting
				27	/// and CSE.
				28	///
				29	/// What we want when sinking however is for a numbering that is a function of
				30	/// the uses of an instruction, which allows us to answer the question "if I
				31	/// replace %a1 with %c1, will it contribute in an equivalent way to all
				32	/// successive instructions?". The PostValueTable class in GVN provides this
				33	/// mapping.
				34	///
				35	//===----------------------------------------------------------------------===//
				36
				37	#include "llvm/ADT/DenseMap.h"
				38	#include "llvm/ADT/DenseMapInfo.h"
				39	#include "llvm/ADT/DenseSet.h"
				40	#include "llvm/ADT/Hashing.h"
				41	#include "llvm/ADT/Optional.h"
				42	#include "llvm/ADT/PostOrderIterator.h"
				43	#include "llvm/ADT/SCCIterator.h"
				44	#include "llvm/ADT/SmallPtrSet.h"
				45	#include "llvm/ADT/Statistic.h"
				46	#include "llvm/ADT/StringExtras.h"
				47	#include "llvm/Analysis/GlobalsModRef.h"
				48	#include "llvm/Analysis/MemorySSA.h"
				49	#include "llvm/Analysis/PostDominators.h"
				50	#include "llvm/Analysis/TargetTransformInfo.h"
				51	#include "llvm/Analysis/ValueTracking.h"
				52	#include "llvm/IR/Instructions.h"
				53	#include "llvm/IR/Verifier.h"
				54	#include "llvm/Support/MathExtras.h"
				55	#include "llvm/Transforms/Scalar.h"
				56	#include "llvm/Transforms/Scalar/GVN.h"
				57	#include "llvm/Transforms/Scalar/GVNExpression.h"
				58	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
				59	#include "llvm/Transforms/Utils/Local.h"
				60	#include <unordered_set>
				61	using namespace llvm;
				62
				63	#define DEBUG_TYPE "gvn-sink"
				64
				65	STATISTIC(NumRemoved, "Number of instructions removed");
				66
				67	namespace {
				68
				69	static bool isMemoryInst(const Instruction *I) {
				70	return isa<LoadInst>(I) \|\| isa<StoreInst>(I) \|\|
				71	(isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) \|\|
				72	(isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory());
				73	}
				74
				75	/// Iterates through instructions in a set of blocks in reverse order from the
				76	/// first non-terminator. For example (assume all blocks have size n):
				77	/// LockstepReverseIterator I([B1, B2, B3]);
				78	/// *I-- = [B1[n], B2[n], B3[n]];
				79	/// *I-- = [B1[n-1], B2[n-1], B3[n-1]];
				80	/// *I-- = [B1[n-2], B2[n-2], B3[n-2]];
				81	/// ...
				82	///
				83	/// It continues until all blocks have been exhausted. Use \c getActiveBlocks()
				84	/// to
				85	/// determine which blocks are still going and the order they appear in the
				86	/// list returned by operator*.
				87	class LockstepReverseIterator {
				88	ArrayRef<BasicBlock *> Blocks;
				89	SmallPtrSet<BasicBlock *, 4> ActiveBlocks;
				90	SmallVector<Instruction *, 4> Insts;
				91	bool Fail;
				92
				93	public:
				94	LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
				95	reset();
				96	}
				97
				98	void reset() {
				99	Fail = false;
				100	ActiveBlocks.clear();
				101	for (BasicBlock *BB : Blocks)
				102	ActiveBlocks.insert(BB);
				103	Insts.clear();
				104	for (BasicBlock *BB : Blocks) {
				105	if (BB->size() <= 1) {
				106	// Block wasn't big enough - only contained a terminator.
				107	ActiveBlocks.erase(BB);
				108	continue;
				109	}
				110	Insts.push_back(BB->getTerminator()->getPrevNode());
				111	}
				112	if (Insts.empty())
				113	Fail = true;
				114	}
				115
				116	bool isValid() const { return !Fail; }
				117	ArrayRef<Instruction > operator() const { return Insts; }
				118	SmallPtrSet<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; }
				119
				120	void restrictToBlocks(SmallPtrSetImpl<BasicBlock *> &Blocks) {
				121	for (auto II = Insts.begin(); II != Insts.end();) {
				122	if (std::find(Blocks.begin(), Blocks.end(), (*II)->getParent()) ==
				123	Blocks.end()) {
				124	ActiveBlocks.erase((*II)->getParent());
				125	II = Insts.erase(II);
				126	} else {
				127	++II;
				128	}
				129	}
				130	}
				131
				132	void operator--() {
				133	if (Fail)
				134	return;
				135	SmallVector<Instruction *, 4> NewInsts;
				136	for (auto *Inst : Insts) {
				137	if (Inst == &Inst->getParent()->front())
				138	ActiveBlocks.erase(Inst->getParent());
				139	else
				140	NewInsts.push_back(Inst->getPrevNode());
				141	}
				142	if (NewInsts.empty()) {
				143	Fail = true;
				144	return;
				145	}
				146	Insts = NewInsts;
				147	}
				148	};
				149
				150	//===----------------------------------------------------------------------===//
				151
				152	/// Candidate solution for sinking. There may be different ways to
				153	/// sink instructions, differing in the number of instructions sunk,
				154	/// the number of predecessors sunk from and the number of PHIs
				155	/// required.
				156	struct SinkingInstructionCandidate {
				157	unsigned NumBlocks;
				158	unsigned NumInstructions;
				159	unsigned NumPHIs;
				160	unsigned NumMemoryInsts;
				161	int Cost = -1;
				162	SmallVector<BasicBlock *, 4> Blocks;
				163
				164	void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
				165	unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
				166	unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
				167	Cost = (NumInstructions * (NumBlocks - 1)) -
				168	(NumExtraPHIs *
				169	NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
				170	- SplitEdgeCost;
				171	}
				172	bool operator>=(const SinkingInstructionCandidate &Other) const {
				173	return Cost >= Other.Cost;
				174	}
				175	};
				176
James Molloy	2a237f1	2017-05-25 13:11:18 +0000	[diff] [blame]	177	#ifndef NDEBUG
James Molloy	a929063	2017-05-25 12:51:11 +0000	[diff] [blame]	178	llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
				179	const SinkingInstructionCandidate &C) {
				180	OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
				181	<< " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
				182	return OS;
				183	}
James Molloy	2a237f1	2017-05-25 13:11:18 +0000	[diff] [blame]	184	#endif
James Molloy	a929063	2017-05-25 12:51:11 +0000	[diff] [blame]	185
				186	//===----------------------------------------------------------------------===//
				187
				188	/// Describes a PHI node that may or may not exist. These track the PHIs
				189	/// that must be created if we sunk a sequence of instructions. It provides
				190	/// a hash function for efficient equality comparisons.
				191	class ModelledPHI {
				192	SmallVector<Value *, 4> Values;
				193	SmallVector<BasicBlock *, 4> Blocks;
				194
				195	public:
				196	ModelledPHI() {}
				197	ModelledPHI(const PHINode *PN) {
				198	for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
				199	Blocks.push_back(PN->getIncomingBlock(I));
				200	std::sort(Blocks.begin(), Blocks.end());
				201
				202	// This assumes the PHI is already well-formed and there aren't conflicting
				203	// incoming values for the same block.
				204	for (auto *B : Blocks)
				205	Values.push_back(PN->getIncomingValueForBlock(B));
				206	}
				207	/// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
				208	/// without the same ID.
				209	/// \note This is specifically for DenseMapInfo - do not use this!
James Molloy	dc2d64b	2017-05-25 13:14:10 +0000	[diff] [blame^]	210	static ModelledPHI createDummy(size_t ID) {
James Molloy	a929063	2017-05-25 12:51:11 +0000	[diff] [blame]	211	ModelledPHI M;
				212	M.Values.push_back(reinterpret_cast<Value*>(ID));
				213	return M;
				214	}
				215
				216	/// Create a PHI from an array of incoming values and incoming blocks.
				217	template <typename VArray, typename BArray>
				218	ModelledPHI(const VArray &V, const BArray &B) {
				219	std::copy(V.begin(), V.end(), std::back_inserter(Values));
				220	std::copy(B.begin(), B.end(), std::back_inserter(Blocks));
				221	}
				222
				223	/// Create a PHI from [I[OpNum] for I in Insts].
				224	template <typename BArray>
				225	ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, const BArray &B) {
				226	std::copy(B.begin(), B.end(), std::back_inserter(Blocks));
				227	for (auto *I : Insts)
				228	Values.push_back(I->getOperand(OpNum));
				229	}
				230
				231	/// Restrict the PHI's contents down to only \c NewBlocks.
				232	/// \c NewBlocks must be a subset of \c this->Blocks.
				233	void restrictToBlocks(const SmallPtrSetImpl<BasicBlock *> &NewBlocks) {
				234	auto BI = Blocks.begin();
				235	auto VI = Values.begin();
				236	while (BI != Blocks.end()) {
				237	assert(VI != Values.end());
				238	if (std::find(NewBlocks.begin(), NewBlocks.end(), *BI) ==
				239	NewBlocks.end()) {
				240	BI = Blocks.erase(BI);
				241	VI = Values.erase(VI);
				242	} else {
				243	++BI;
				244	++VI;
				245	}
				246	}
				247	assert(Blocks.size() == NewBlocks.size());
				248	}
				249
				250	ArrayRef<Value *> getValues() const { return Values; }
				251
				252	bool areAllIncomingValuesSame() const {
				253	return all_of(Values, [&](Value *V) { return V == Values[0]; });
				254	}
				255	bool areAllIncomingValuesSameType() const {
				256	return all_of(
				257	Values, [&](Value *V) { return V->getType() == Values[0]->getType(); });
				258	}
				259	bool areAnyIncomingValuesConstant() const {
				260	return any_of(Values, [&](Value *V) { return isa<Constant>(V); });
				261	}
				262	// Hash functor
				263	unsigned hash() const {
				264	return (unsigned)hash_combine_range(Values.begin(), Values.end());
				265	}
				266	bool operator==(const ModelledPHI &Other) const {
				267	return Values == Other.Values && Blocks == Other.Blocks;
				268	}
				269	};
				270
				271	template <typename ModelledPHI> struct DenseMapInfo {
				272	static inline ModelledPHI &getEmptyKey() {
				273	static ModelledPHI Dummy = ModelledPHI::createDummy(0);
				274	return Dummy;
				275	}
				276	static inline ModelledPHI &getTombstoneKey() {
				277	static ModelledPHI Dummy = ModelledPHI::createDummy(1);
				278	return Dummy;
				279	}
				280	static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
				281	static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
				282	return LHS == RHS;
				283	}
				284	};
				285
				286	typedef DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>> ModelledPHISet;
				287
				288	//===----------------------------------------------------------------------===//
				289	// ValueTable
				290	//===----------------------------------------------------------------------===//
				291	// This is a value number table where the value number is a function of the
				292	// uses of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
				293	// that the program would be equivalent if we replaced A with PHI(A, B).
				294	//===----------------------------------------------------------------------===//
				295
				296	/// A GVN expression describing how an instruction is used. The operands
				297	/// field of BasicExpression is used to store uses, not operands.
				298	///
				299	/// This class also contains fields for discriminators used when determining
				300	/// equivalence of instructions with sideeffects.
				301	class InstructionUseExpr : public GVNExpression::BasicExpression {
				302	unsigned MemoryUseOrder = -1;
				303	bool Volatile = false;
				304
				305	public:
				306	InstructionUseExpr(Instruction I, ArrayRecycler<Value > &R,
				307	BumpPtrAllocator &A)
				308	: GVNExpression::BasicExpression(I->getNumUses()) {
				309	allocateOperands(R, A);
				310	setOpcode(I->getOpcode());
				311	setType(I->getType());
				312
				313	for (auto &U : I->uses())
				314	op_push_back(U.getUser());
				315	std::sort(op_begin(), op_end());
				316	}
				317	void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
				318	void setVolatile(bool V) { Volatile = V; }
				319
				320	virtual hash_code getHashValue() const {
				321	return hash_combine(GVNExpression::BasicExpression::getHashValue(),
				322	MemoryUseOrder, Volatile);
				323	}
				324
				325	template <typename Function> hash_code getHashValue(Function MapFn) {
				326	hash_code H =
				327	hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile);
				328	for (auto *V : operands())
				329	H = hash_combine(H, MapFn(V));
				330	return H;
				331	}
				332	};
				333
				334	class ValueTable {
				335	DenseMap<Value *, uint32_t> ValueNumbering;
				336	DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
				337	DenseMap<size_t, uint32_t> HashNumbering;
				338	BumpPtrAllocator Allocator;
				339	ArrayRecycler<Value *> Recycler;
				340	uint32_t nextValueNumber;
				341
				342	/// Create an expression for I based on its opcode and its uses. If I
				343	/// touches or reads memory, the expression is also based upon its memory
				344	/// order - see \c getMemoryUseOrder().
				345	InstructionUseExpr createExpr(Instruction I) {
				346	InstructionUseExpr *E =
				347	new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
				348	if (isMemoryInst(I))
				349	E->setMemoryUseOrder(getMemoryUseOrder(I));
				350
				351	if (CmpInst *C = dyn_cast<CmpInst>(I)) {
				352	CmpInst::Predicate Predicate = C->getPredicate();
				353	E->setOpcode((C->getOpcode() << 8) \| Predicate);
				354	}
				355	return E;
				356	}
				357
				358	/// Helper to compute the value number for a memory instruction
				359	/// (LoadInst/StoreInst), including checking the memory ordering and
				360	/// volatility.
				361	template <class Inst> InstructionUseExpr createMemoryExpr(Inst I) {
				362	if (isStrongerThanUnordered(I->getOrdering()) \|\| I->isAtomic())
				363	return nullptr;
				364	InstructionUseExpr *E = createExpr(I);
				365	E->setVolatile(I->isVolatile());
				366	return E;
				367	}
				368
				369	public:
				370	/// Returns the value number for the specified value, assigning
				371	/// it a new number if it did not have one before.
				372	uint32_t lookupOrAdd(Value *V) {
				373	auto VI = ValueNumbering.find(V);
				374	if (VI != ValueNumbering.end())
				375	return VI->second;
				376
				377	if (!isa<Instruction>(V)) {
				378	ValueNumbering[V] = nextValueNumber;
				379	return nextValueNumber++;
				380	}
				381
				382	Instruction *I = cast<Instruction>(V);
				383	InstructionUseExpr *exp = nullptr;
				384	switch (I->getOpcode()) {
				385	case Instruction::Load:
				386	exp = createMemoryExpr(cast<LoadInst>(I));
				387	break;
				388	case Instruction::Store:
				389	exp = createMemoryExpr(cast<StoreInst>(I));
				390	break;
				391	case Instruction::Call:
				392	case Instruction::Invoke:
				393	case Instruction::Add:
				394	case Instruction::FAdd:
				395	case Instruction::Sub:
				396	case Instruction::FSub:
				397	case Instruction::Mul:
				398	case Instruction::FMul:
				399	case Instruction::UDiv:
				400	case Instruction::SDiv:
				401	case Instruction::FDiv:
				402	case Instruction::URem:
				403	case Instruction::SRem:
				404	case Instruction::FRem:
				405	case Instruction::Shl:
				406	case Instruction::LShr:
				407	case Instruction::AShr:
				408	case Instruction::And:
				409	case Instruction::Or:
				410	case Instruction::Xor:
				411	case Instruction::ICmp:
				412	case Instruction::FCmp:
				413	case Instruction::Trunc:
				414	case Instruction::ZExt:
				415	case Instruction::SExt:
				416	case Instruction::FPToUI:
				417	case Instruction::FPToSI:
				418	case Instruction::UIToFP:
				419	case Instruction::SIToFP:
				420	case Instruction::FPTrunc:
				421	case Instruction::FPExt:
				422	case Instruction::PtrToInt:
				423	case Instruction::IntToPtr:
				424	case Instruction::BitCast:
				425	case Instruction::Select:
				426	case Instruction::ExtractElement:
				427	case Instruction::InsertElement:
				428	case Instruction::ShuffleVector:
				429	case Instruction::InsertValue:
				430	case Instruction::GetElementPtr:
				431	exp = createExpr(I);
				432	break;
				433	default:
				434	break;
				435	}
				436
				437	if (!exp) {
				438	ValueNumbering[V] = nextValueNumber;
				439	return nextValueNumber++;
				440	}
				441
				442	uint32_t e = ExpressionNumbering[exp];
				443	if (!e) {
				444	hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); });
				445	auto I = HashNumbering.find(H);
				446	if (I != HashNumbering.end()) {
				447	e = I->second;
				448	} else {
				449	e = nextValueNumber++;
				450	HashNumbering[H] = e;
				451	ExpressionNumbering[exp] = e;
				452	}
				453	}
				454	ValueNumbering[V] = e;
				455	return e;
				456	}
				457
				458	/// Returns the value number of the specified value. Fails if the value has
				459	/// not yet been numbered.
				460	uint32_t lookup(Value *V) const {
				461	auto VI = ValueNumbering.find(V);
				462	assert(VI != ValueNumbering.end() && "Value not numbered?");
				463	return VI->second;
				464	}
				465
				466	/// Removes all value numberings and resets the value table.
				467	void clear() {
				468	ValueNumbering.clear();
				469	ExpressionNumbering.clear();
				470	HashNumbering.clear();
				471	Recycler.clear(Allocator);
				472	nextValueNumber = 1;
				473	}
				474
				475	ValueTable() : nextValueNumber(1) {}
				476
				477	/// \c Inst uses or touches memory. Return an ID describing the memory state
				478	/// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
				479	/// the exact same memory operations happen after I1 and I2.
				480	///
				481	/// This is a very hard problem in general, so we use domain-specific
				482	/// knowledge that we only ever check for equivalence between blocks sharing a
				483	/// single immediate successor that is common, and when determining if I1 ==
				484	/// I2 we will have already determined that next(I1) == next(I2). This
				485	/// inductive property allows us to simply return the value number of the next
				486	/// instruction that defines memory.
				487	uint32_t getMemoryUseOrder(Instruction *Inst) {
				488	auto *BB = Inst->getParent();
				489	for (auto I = std::next(Inst->getIterator()), E = BB->end();
				490	I != E && !I->isTerminator(); ++I) {
				491	if (!isMemoryInst(&*I))
				492	continue;
				493	if (isa<LoadInst>(&*I))
				494	continue;
				495	CallInst CI = dyn_cast<CallInst>(&I);
				496	if (CI && CI->onlyReadsMemory())
				497	continue;
				498	InvokeInst II = dyn_cast<InvokeInst>(&I);
				499	if (II && II->onlyReadsMemory())
				500	continue;
				501	return lookupOrAdd(&*I);
				502	}
				503	return 0;
				504	}
				505	};
				506
				507	//===----------------------------------------------------------------------===//
				508
				509	class GVNSink {
				510	public:
				511	GVNSink() : VN() {}
				512	bool run(Function &F) {
				513	DEBUG(dbgs() << "GVNSink: running on function @" << F.getName() << "\n");
				514
				515	unsigned NumSunk = 0;
				516	ReversePostOrderTraversal<Function*> RPOT(&F);
				517	for (auto *N : RPOT)
				518	NumSunk += sinkBB(N);
				519
				520	return NumSunk > 0;
				521	}
				522
				523	private:
				524	ValueTable VN;
				525
				526	bool isInstructionBlacklisted(Instruction *I) {
				527	// These instructions may change or break semantics if moved.
				528	if (isa<PHINode>(I) \|\| I->isEHPad() \|\| isa<AllocaInst>(I) \|\|
				529	I->getType()->isTokenTy())
				530	return true;
				531	return false;
				532	}
				533
				534	/// The main heuristic function. Analyze the set of instructions pointed to by
				535	/// LRI and return a candidate solution if these instructions can be sunk, or
				536	/// None otherwise.
				537	Optional<SinkingInstructionCandidate> analyzeInstructionForSinking(
				538	LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
				539	ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents);
				540
				541	/// Create a ModelledPHI for each PHI in BB, adding to PHIs.
				542	void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
				543	SmallPtrSetImpl<Value *> &PHIContents) {
				544	for (auto &I : *BB) {
				545	auto *PN = dyn_cast<PHINode>(&I);
				546	if (!PN)
				547	return;
				548
				549	auto MPHI = ModelledPHI(PN);
				550	PHIs.insert(MPHI);
				551	for (auto *V : MPHI.getValues())
				552	PHIContents.insert(V);
				553	}
				554	}
				555
				556	/// The main instruction sinking driver. Set up state and try and sink
				557	/// instructions into BBEnd from its predecessors.
				558	unsigned sinkBB(BasicBlock *BBEnd);
				559
				560	/// Perform the actual mechanics of sinking an instruction from Blocks into
				561	/// BBEnd, which is their only successor.
				562	void sinkLastInstruction(ArrayRef<BasicBlock > Blocks, BasicBlock BBEnd);
				563
				564	/// Remove PHIs that all have the same incoming value.
				565	void foldPointlessPHINodes(BasicBlock *BB) {
				566	auto I = BB->begin();
				567	while (PHINode *PN = dyn_cast<PHINode>(I++)) {
				568	if (!all_of(PN->incoming_values(),
				569	[&](const Value *V) { return V == PN->getIncomingValue(0); }))
				570	continue;
				571	if (PN->getIncomingValue(0) != PN)
				572	PN->replaceAllUsesWith(PN->getIncomingValue(0));
				573	else
				574	PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
				575	PN->eraseFromParent();
				576	}
				577	}
				578	};
				579
				580	Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
				581	LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
				582	ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents) {
				583	auto Insts = *LRI;
				584	DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
				585	: Insts) {
				586	I->dump();
				587	} dbgs() << " ]\n";);
				588
				589	DenseMap<uint32_t, unsigned> VNums;
				590	for (auto *I : Insts) {
				591	uint32_t N = VN.lookupOrAdd(I);
				592	DEBUG(dbgs() << " VN=" << utohexstr(N) << " for" << *I << "\n");
				593	if (N == ~0U)
				594	return None;
				595	VNums[N]++;
				596	}
				597	unsigned VNumToSink =
				598	std::max_element(VNums.begin(), VNums.end(),
				599	[](const std::pair<uint32_t, unsigned> &I,
				600	const std::pair<uint32_t, unsigned> &J) {
				601	return I.second < J.second;
				602	})
				603	->first;
				604
				605	if (VNums[VNumToSink] == 1)
				606	// Can't sink anything!
				607	return None;
				608
				609	// Now restrict the number of incoming blocks down to only those with
				610	// VNumToSink.
				611	auto &ActivePreds = LRI.getActiveBlocks();
				612	unsigned InitialActivePredSize = ActivePreds.size();
				613	SmallVector<Instruction *, 4> NewInsts;
				614	for (auto *I : Insts) {
				615	if (VN.lookup(I) != VNumToSink)
				616	ActivePreds.erase(I->getParent());
				617	else
				618	NewInsts.push_back(I);
				619	}
				620	for (auto *I : NewInsts)
				621	if (isInstructionBlacklisted(I))
				622	return None;
				623
				624	// If we've restricted the incoming blocks, restrict all needed PHIs also
				625	// to that set.
				626	bool RecomputePHIContents = false;
				627	if (ActivePreds.size() != InitialActivePredSize) {
				628	ModelledPHISet NewNeededPHIs;
				629	for (auto P : NeededPHIs) {
				630	P.restrictToBlocks(ActivePreds);
				631	NewNeededPHIs.insert(P);
				632	}
				633	NeededPHIs = NewNeededPHIs;
				634	LRI.restrictToBlocks(ActivePreds);
				635	RecomputePHIContents = true;
				636	}
				637
				638	// The sunk instruction's results.
				639	ModelledPHI NewPHI(NewInsts, ActivePreds);
				640
				641	// Does sinking this instruction render previous PHIs redundant?
				642	if (NeededPHIs.find(NewPHI) != NeededPHIs.end()) {
				643	NeededPHIs.erase(NewPHI);
				644	RecomputePHIContents = true;
				645	}
				646
				647	if (RecomputePHIContents) {
				648	// The needed PHIs have changed, so recompute the set of all needed
				649	// values.
				650	PHIContents.clear();
				651	for (auto &PHI : NeededPHIs)
				652	PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
				653	}
				654
				655	// Is this instruction required by a later PHI that doesn't match this PHI?
				656	// if so, we can't sink this instruction.
				657	for (auto *V : NewPHI.getValues())
				658	if (PHIContents.count(V))
				659	// V exists in this PHI, but the whole PHI is different to NewPHI
				660	// (else it would have been removed earlier). We cannot continue
				661	// because this isn't representable.
				662	return None;
				663
				664	// Which operands need PHIs?
				665	// FIXME: If any of these fail, we should partition up the candidates to
				666	// try and continue making progress.
				667	Instruction *I0 = NewInsts[0];
				668	for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
				669	ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
				670	if (PHI.areAllIncomingValuesSame())
				671	continue;
				672	if (!canReplaceOperandWithVariable(I0, OpNum))
				673	// We can 't create a PHI from this instruction!
				674	return None;
				675	if (NeededPHIs.count(PHI))
				676	continue;
				677	if (!PHI.areAllIncomingValuesSameType())
				678	return None;
				679	// Don't create indirect calls! The called value is the final operand.
				680	if ((isa<CallInst>(I0) \|\| isa<InvokeInst>(I0)) && OpNum == E - 1 &&
				681	PHI.areAnyIncomingValuesConstant())
				682	return None;
				683
				684	NeededPHIs.reserve(NeededPHIs.size());
				685	NeededPHIs.insert(PHI);
				686	PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
				687	}
				688
				689	if (isMemoryInst(NewInsts[0]))
				690	++MemoryInstNum;
				691
				692	SinkingInstructionCandidate Cand;
				693	Cand.NumInstructions = ++InstNum;
				694	Cand.NumMemoryInsts = MemoryInstNum;
				695	Cand.NumBlocks = ActivePreds.size();
				696	Cand.NumPHIs = NeededPHIs.size();
				697	for (auto *C : ActivePreds)
				698	Cand.Blocks.push_back(C);
				699
				700	return Cand;
				701	}
				702
				703	unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
				704	DEBUG(dbgs() << "GVNSink: running on basic block ";
				705	BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
				706	SmallVector<BasicBlock *, 4> Preds;
				707	for (auto *B : predecessors(BBEnd)) {
				708	auto *T = B->getTerminator();
				709	if (isa<BranchInst>(T) \|\| isa<SwitchInst>(T))
				710	Preds.push_back(B);
				711	else
				712	return 0;
				713	}
				714	if (Preds.size() < 2)
				715	return 0;
				716	std::sort(Preds.begin(), Preds.end());
				717
				718	unsigned NumOrigPreds = Preds.size();
				719	// We can only sink instructions through unconditional branches.
				720	for (auto I = Preds.begin(); I != Preds.end();) {
				721	if ((*I)->getTerminator()->getNumSuccessors() != 1)
				722	I = Preds.erase(I);
				723	else
				724	++I;
				725	}
				726
				727	LockstepReverseIterator LRI(Preds);
				728	SmallVector<SinkingInstructionCandidate, 4> Candidates;
				729	unsigned InstNum = 0, MemoryInstNum = 0;
				730	ModelledPHISet NeededPHIs;
				731	SmallPtrSet<Value *, 4> PHIContents;
				732	analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);
				733	unsigned NumOrigPHIs = NeededPHIs.size();
				734
				735	while (LRI.isValid()) {
				736	auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
				737	NeededPHIs, PHIContents);
				738	if (!Cand)
				739	break;
				740	Cand->calculateCost(NumOrigPHIs, Preds.size());
				741	Candidates.emplace_back(*Cand);
				742	--LRI;
				743	}
				744
				745	std::stable_sort(
				746	Candidates.begin(), Candidates.end(),
				747	[](const SinkingInstructionCandidate &A,
				748	const SinkingInstructionCandidate &B) { return A >= B; });
				749	DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
				750	: Candidates) dbgs()
				751	<< " " << C << "\n";);
				752
				753	// Pick the top candidate, as long it is positive!
				754	if (Candidates.empty() \|\| Candidates.front().Cost <= 0)
				755	return 0;
				756	auto C = Candidates.front();
				757
				758	DEBUG(dbgs() << " -- Sinking: " << C << "\n");
				759	BasicBlock *InsertBB = BBEnd;
				760	if (C.Blocks.size() < NumOrigPreds) {
				761	DEBUG(dbgs() << " -- Splitting edge to "; BBEnd->printAsOperand(dbgs());
				762	dbgs() << "\n");
				763	InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split");
				764	if (!InsertBB) {
				765	DEBUG(dbgs() << " -- FAILED to split edge!\n");
				766	// Edge couldn't be split.
				767	return 0;
				768	}
				769	}
				770
				771	for (unsigned I = 0; I < C.NumInstructions; ++I)
				772	sinkLastInstruction(C.Blocks, InsertBB);
				773
				774	return C.NumInstructions;
				775	}
				776
				777	void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
				778	BasicBlock *BBEnd) {
				779	SmallVector<Instruction *, 4> Insts;
				780	for (BasicBlock *BB : Blocks)
				781	Insts.push_back(BB->getTerminator()->getPrevNode());
				782	Instruction *I0 = Insts.front();
				783
				784	SmallVector<Value *, 4> NewOperands;
				785	for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
				786	bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
				787	return I->getOperand(O) != I0->getOperand(O);
				788	});
				789	if (!NeedPHI) {
				790	NewOperands.push_back(I0->getOperand(O));
				791	continue;
				792	}
				793
				794	// Create a new PHI in the successor block and populate it.
				795	auto *Op = I0->getOperand(O);
				796	assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
				797	auto *PN = PHINode::Create(Op->getType(), Insts.size(),
				798	Op->getName() + ".sink", &BBEnd->front());
				799	for (auto *I : Insts)
				800	PN->addIncoming(I->getOperand(O), I->getParent());
				801	NewOperands.push_back(PN);
				802	}
				803
				804	// Arbitrarily use I0 as the new "common" instruction; remap its operands
				805	// and move it to the start of the successor block.
				806	for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
				807	I0->getOperandUse(O).set(NewOperands[O]);
				808	I0->moveBefore(&*BBEnd->getFirstInsertionPt());
				809
				810	// Update metadata and IR flags.
				811	for (auto *I : Insts)
				812	if (I != I0) {
				813	combineMetadataForCSE(I0, I);
				814	I0->andIRFlags(I);
				815	}
				816
				817	for (auto *I : Insts)
				818	if (I != I0)
				819	I->replaceAllUsesWith(I0);
				820	foldPointlessPHINodes(BBEnd);
				821
				822	// Finally nuke all instructions apart from the common instruction.
				823	for (auto *I : Insts)
				824	if (I != I0)
				825	I->eraseFromParent();
				826
				827	NumRemoved += Insts.size() - 1;
				828	}
				829
				830	////////////////////////////////////////////////////////////////////////////////
				831	// Pass machinery / boilerplate
				832
				833	class GVNSinkLegacyPass : public FunctionPass {
				834	public:
				835	static char ID;
				836
				837	GVNSinkLegacyPass() : FunctionPass(ID) {
				838	initializeGVNSinkLegacyPassPass(*PassRegistry::getPassRegistry());
				839	}
				840
				841	bool runOnFunction(Function &F) override {
				842	if (skipFunction(F))
				843	return false;
				844	GVNSink G;
				845	return G.run(F);
				846	}
				847
				848	void getAnalysisUsage(AnalysisUsage &AU) const override {
				849	AU.addPreserved<GlobalsAAWrapperPass>();
				850	}
				851	};
				852	} // namespace
				853
				854	PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
				855	GVNSink G;
				856	if (!G.run(F))
				857	return PreservedAnalyses::all();
				858
				859	PreservedAnalyses PA;
				860	PA.preserve<GlobalsAA>();
				861	return PA;
				862	}
				863
				864	char GVNSinkLegacyPass::ID = 0;
				865	INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink",
				866	"Early GVN sinking of Expressions", false, false)
				867	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
				868	INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
				869	INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink",
				870	"Early GVN sinking of Expressions", false, false)
				871
				872	FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); }