Blame - llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp - toolchain/llvm-project

blob: c0f75ddddbe0f556ad7fe2b9d460dc11586def51 [file] [log] [blame]

Chandler Carruth	c34f789	2017-11-28 11:32:31 +0000	[diff] [blame]	1	//===- SpeculateAroundPHIs.cpp --------------------------------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9
				10	#include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h"
				11	#include "llvm/ADT/PostOrderIterator.h"
				12	#include "llvm/ADT/Sequence.h"
				13	#include "llvm/ADT/SetVector.h"
				14	#include "llvm/ADT/Statistic.h"
				15	#include "llvm/Analysis/TargetTransformInfo.h"
				16	#include "llvm/Analysis/ValueTracking.h"
				17	#include "llvm/IR/BasicBlock.h"
				18	#include "llvm/IR/IRBuilder.h"
				19	#include "llvm/IR/Instructions.h"
				20	#include "llvm/IR/IntrinsicInst.h"
				21	#include "llvm/Support/Debug.h"
				22	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
				23
				24	using namespace llvm;
				25
				26	#define DEBUG_TYPE "spec-phis"
				27
				28	STATISTIC(NumPHIsSpeculated, "Number of PHI nodes we speculated around");
				29	STATISTIC(NumEdgesSplit,
				30	"Number of critical edges which were split for speculation");
				31	STATISTIC(NumSpeculatedInstructions,
				32	"Number of instructions we speculated around the PHI nodes");
				33	STATISTIC(NumNewRedundantInstructions,
				34	"Number of new, redundant instructions inserted");
				35
Nico Weber	e75fd1b	2018-08-27 14:25:22 +0000	[diff] [blame]	36	/// Check whether speculating the users of a PHI node around the PHI
Chandler Carruth	c34f789	2017-11-28 11:32:31 +0000	[diff] [blame]	37	/// will be safe.
				38	///
				39	/// This checks both that all of the users are safe and also that all of their
				40	/// operands are either recursively safe or already available along an incoming
				41	/// edge to the PHI.
				42	///
				43	/// This routine caches both all the safe nodes explored in `PotentialSpecSet`
				44	/// and the chain of nodes that definitively reach any unsafe node in
				45	/// `UnsafeSet`. By preserving these between repeated calls to this routine for
				46	/// PHIs in the same basic block, the exploration here can be reused. However,
				47	/// these caches must no be reused for PHIs in a different basic block as they
				48	/// reflect what is available along incoming edges.
				49	static bool
				50	isSafeToSpeculatePHIUsers(PHINode &PN, DominatorTree &DT,
				51	SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
				52	SmallPtrSetImpl<Instruction *> &UnsafeSet) {
				53	auto *PhiBB = PN.getParent();
				54	SmallPtrSet<Instruction *, 4> Visited;
				55	SmallVector<std::pair<Instruction *, User::value_op_iterator>, 16> DFSStack;
				56
				57	// Walk each user of the PHI node.
				58	for (Use &U : PN.uses()) {
				59	auto *UI = cast<Instruction>(U.getUser());
				60
				61	// Ensure the use post-dominates the PHI node. This ensures that, in the
				62	// absence of unwinding, the use will actually be reached.
				63	// FIXME: We use a blunt hammer of requiring them to be in the same basic
				64	// block. We should consider using actual post-dominance here in the
				65	// future.
				66	if (UI->getParent() != PhiBB) {
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	67	LLVM_DEBUG(dbgs() << " Unsafe: use in a different BB: " << *UI << "\n");
Chandler Carruth	c34f789	2017-11-28 11:32:31 +0000	[diff] [blame]	68	return false;
				69	}
				70
				71	// FIXME: This check is much too conservative. We're not going to move these
				72	// instructions onto new dynamic paths through the program unless there is
				73	// a call instruction between the use and the PHI node. And memory isn't
				74	// changing unless there is a store in that same sequence. We should
				75	// probably change this to do at least a limited scan of the intervening
				76	// instructions and allow handling stores in easily proven safe cases.
				77	if (mayBeMemoryDependent(*UI)) {
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	78	LLVM_DEBUG(dbgs() << " Unsafe: can't speculate use: " << *UI << "\n");
Chandler Carruth	c34f789	2017-11-28 11:32:31 +0000	[diff] [blame]	79	return false;
				80	}
				81
				82	// Now do a depth-first search of everything these users depend on to make
				83	// sure they are transitively safe. This is a depth-first search, but we
				84	// check nodes in preorder to minimize the amount of checking.
				85	Visited.insert(UI);
				86	DFSStack.push_back({UI, UI->value_op_begin()});
				87	do {
				88	User::value_op_iterator OpIt;
				89	std::tie(UI, OpIt) = DFSStack.pop_back_val();
				90
				91	while (OpIt != UI->value_op_end()) {
				92	auto OpI = dyn_cast<Instruction>(OpIt);
				93	// Increment to the next operand for whenever we continue.
				94	++OpIt;
				95	// No need to visit non-instructions, which can't form dependencies.
				96	if (!OpI)
				97	continue;
				98
				99	// Now do the main pre-order checks that this operand is a viable
				100	// dependency of something we want to speculate.
				101
				102	// First do a few checks for instructions that won't require
				103	// speculation at all because they are trivially available on the
				104	// incoming edge (either through dominance or through an incoming value
				105	// to a PHI).
				106	//
				107	// The cases in the current block will be trivially dominated by the
				108	// edge.
				109	auto *ParentBB = OpI->getParent();
				110	if (ParentBB == PhiBB) {
				111	if (isa<PHINode>(OpI)) {
				112	// We can trivially map through phi nodes in the same block.
				113	continue;
				114	}
				115	} else if (DT.dominates(ParentBB, PhiBB)) {
				116	// Instructions from dominating blocks are already available.
				117	continue;
				118	}
				119
				120	// Once we know that we're considering speculating the operand, check
				121	// if we've already explored this subgraph and found it to be safe.
				122	if (PotentialSpecSet.count(OpI))
				123	continue;
				124
				125	// If we've already explored this subgraph and found it unsafe, bail.
				126	// If when we directly test whether this is safe it fails, bail.
				127	if (UnsafeSet.count(OpI) \|\| ParentBB != PhiBB \|\|
				128	mayBeMemoryDependent(*OpI)) {
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	129	LLVM_DEBUG(dbgs() << " Unsafe: can't speculate transitive use: "
				130	<< *OpI << "\n");
Chandler Carruth	c34f789	2017-11-28 11:32:31 +0000	[diff] [blame]	131	// Record the stack of instructions which reach this node as unsafe
				132	// so we prune subsequent searches.
				133	UnsafeSet.insert(OpI);
				134	for (auto &StackPair : DFSStack) {
				135	Instruction *I = StackPair.first;
				136	UnsafeSet.insert(I);
				137	}
				138	return false;
				139	}
				140
				141	// Skip any operands we're already recursively checking.
				142	if (!Visited.insert(OpI).second)
				143	continue;
				144
				145	// Push onto the stack and descend. We can directly continue this
				146	// loop when ascending.
				147	DFSStack.push_back({UI, OpIt});
				148	UI = OpI;
				149	OpIt = OpI->value_op_begin();
				150	}
				151
				152	// This node and all its operands are safe. Go ahead and cache that for
				153	// reuse later.
				154	PotentialSpecSet.insert(UI);
				155
				156	// Continue with the next node on the stack.
				157	} while (!DFSStack.empty());
				158	}
				159
				160	#ifndef NDEBUG
				161	// Every visited operand should have been marked as safe for speculation at
				162	// this point. Verify this and return success.
				163	for (auto *I : Visited)
				164	assert(PotentialSpecSet.count(I) &&
				165	"Failed to mark a visited instruction as safe!");
				166	#endif
				167	return true;
				168	}
				169
				170	/// Check whether, in isolation, a given PHI node is both safe and profitable
				171	/// to speculate users around.
				172	///
				173	/// This handles checking whether there are any constant operands to a PHI
				174	/// which could represent a useful speculation candidate, whether the users of
				175	/// the PHI are safe to speculate including all their transitive dependencies,
				176	/// and whether after speculation there will be some cost savings (profit) to
				177	/// folding the operands into the users of the PHI node. Returns true if both
				178	/// safe and profitable with relevant cost savings updated in the map and with
				179	/// an update to the `PotentialSpecSet`. Returns false if either safety or
				180	/// profitability are absent. Some new entries may be made to the
				181	/// `PotentialSpecSet` even when this routine returns false, but they remain
				182	/// conservatively correct.
				183	///
				184	/// The profitability check here is a local one, but it checks this in an
				185	/// interesting way. Beyond checking that the total cost of materializing the
				186	/// constants will be less than the cost of folding them into their users, it
				187	/// also checks that no one incoming constant will have a higher cost when
				188	/// folded into its users rather than materialized. This higher cost could
				189	/// result in a dynamic path that is more expensive even when the total cost
				190	/// is lower. Currently, all of the interesting cases where this optimization
				191	/// should fire are ones where it is a no-loss operation in this sense. If we
				192	/// ever want to be more aggressive here, we would need to balance the
				193	/// different incoming edges' cost by looking at their respective
				194	/// probabilities.
				195	static bool isSafeAndProfitableToSpeculateAroundPHI(
				196	PHINode &PN, SmallDenseMap<PHINode *, int, 16> &CostSavingsMap,
				197	SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
				198	SmallPtrSetImpl<Instruction *> &UnsafeSet, DominatorTree &DT,
				199	TargetTransformInfo &TTI) {
				200	// First see whether there is any cost savings to speculating around this
				201	// PHI, and build up a map of the constant inputs to how many times they
				202	// occur.
				203	bool NonFreeMat = false;
				204	struct CostsAndCount {
				205	int MatCost = TargetTransformInfo::TCC_Free;
				206	int FoldedCost = TargetTransformInfo::TCC_Free;
				207	int Count = 0;
				208	};
				209	SmallDenseMap<ConstantInt *, CostsAndCount, 16> CostsAndCounts;
				210	SmallPtrSet<BasicBlock *, 16> IncomingConstantBlocks;
				211	for (int i : llvm::seq<int>(0, PN.getNumIncomingValues())) {
				212	auto *IncomingC = dyn_cast<ConstantInt>(PN.getIncomingValue(i));
				213	if (!IncomingC)
				214	continue;
				215
				216	// Only visit each incoming edge with a constant input once.
				217	if (!IncomingConstantBlocks.insert(PN.getIncomingBlock(i)).second)
				218	continue;
				219
				220	auto InsertResult = CostsAndCounts.insert({IncomingC, {}});
				221	// Count how many edges share a given incoming costant.
				222	++InsertResult.first->second.Count;
				223	// Only compute the cost the first time we see a particular constant.
				224	if (!InsertResult.second)
				225	continue;
				226
				227	int &MatCost = InsertResult.first->second.MatCost;
				228	MatCost = TTI.getIntImmCost(IncomingC->getValue(), IncomingC->getType());
				229	NonFreeMat \|= MatCost != TTI.TCC_Free;
				230	}
				231	if (!NonFreeMat) {
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	232	LLVM_DEBUG(dbgs() << " Free: " << PN << "\n");
Chandler Carruth	c34f789	2017-11-28 11:32:31 +0000	[diff] [blame]	233	// No profit in free materialization.
				234	return false;
				235	}
				236
				237	// Now check that the uses of this PHI can actually be speculated,
				238	// otherwise we'll still have to materialize the PHI value.
				239	if (!isSafeToSpeculatePHIUsers(PN, DT, PotentialSpecSet, UnsafeSet)) {
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	240	LLVM_DEBUG(dbgs() << " Unsafe PHI: " << PN << "\n");
Chandler Carruth	c34f789	2017-11-28 11:32:31 +0000	[diff] [blame]	241	return false;
				242	}
				243
				244	// Compute how much (if any) savings are available by speculating around this
				245	// PHI.
				246	for (Use &U : PN.uses()) {
				247	auto *UserI = cast<Instruction>(U.getUser());
				248	// Now check whether there is any savings to folding the incoming constants
				249	// into this use.
				250	unsigned Idx = U.getOperandNo();
				251
				252	// If we have a binary operator that is commutative, an actual constant
				253	// operand would end up on the RHS, so pretend the use of the PHI is on the
				254	// RHS.
				255	//
				256	// Technically, this is a bit weird if both operands are PHIs we're
				257	// speculating. But if that is the case, giving an "optimistic" cost isn't
				258	// a bad thing because after speculation it will constant fold. And
				259	// moreover, such cases should likely have been constant folded already by
				260	// some other pass, so we shouldn't worry about "modeling" them terribly
				261	// accurately here. Similarly, if the other operand is a constant, it still
				262	// seems fine to be "optimistic" in our cost modeling, because when the
				263	// incoming operand from the PHI node is also a constant, we will end up
				264	// constant folding.
				265	if (UserI->isBinaryOp() && UserI->isCommutative() && Idx != 1)
				266	// Assume we will commute the constant to the RHS to be canonical.
				267	Idx = 1;
				268
Hiroshi Inoue	f209649	2018-06-14 05:41:49 +0000	[diff] [blame]	269	// Get the intrinsic ID if this user is an intrinsic.
Chandler Carruth	c34f789	2017-11-28 11:32:31 +0000	[diff] [blame]	270	Intrinsic::ID IID = Intrinsic::not_intrinsic;
				271	if (auto *UserII = dyn_cast<IntrinsicInst>(UserI))
				272	IID = UserII->getIntrinsicID();
				273
				274	for (auto &IncomingConstantAndCostsAndCount : CostsAndCounts) {
				275	ConstantInt *IncomingC = IncomingConstantAndCostsAndCount.first;
				276	int MatCost = IncomingConstantAndCostsAndCount.second.MatCost;
				277	int &FoldedCost = IncomingConstantAndCostsAndCount.second.FoldedCost;
				278	if (IID)
				279	FoldedCost += TTI.getIntImmCost(IID, Idx, IncomingC->getValue(),
				280	IncomingC->getType());
				281	else
				282	FoldedCost +=
				283	TTI.getIntImmCost(UserI->getOpcode(), Idx, IncomingC->getValue(),
				284	IncomingC->getType());
				285
				286	// If we accumulate more folded cost for this incoming constant than
				287	// materialized cost, then we'll regress any edge with this constant so
				288	// just bail. We're only interested in cases where folding the incoming
				289	// constants is at least break-even on all paths.
				290	if (FoldedCost > MatCost) {
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	291	LLVM_DEBUG(dbgs() << " Not profitable to fold imm: " << *IncomingC
				292	<< "\n"
				293	" Materializing cost: "
				294	<< MatCost
				295	<< "\n"
				296	" Accumulated folded cost: "
				297	<< FoldedCost << "\n");
Chandler Carruth	c34f789	2017-11-28 11:32:31 +0000	[diff] [blame]	298	return false;
				299	}
				300	}
				301	}
				302
				303	// Compute the total cost savings afforded by this PHI node.
				304	int TotalMatCost = TTI.TCC_Free, TotalFoldedCost = TTI.TCC_Free;
				305	for (auto IncomingConstantAndCostsAndCount : CostsAndCounts) {
				306	int MatCost = IncomingConstantAndCostsAndCount.second.MatCost;
				307	int FoldedCost = IncomingConstantAndCostsAndCount.second.FoldedCost;
				308	int Count = IncomingConstantAndCostsAndCount.second.Count;
				309
				310	TotalMatCost += MatCost * Count;
				311	TotalFoldedCost += FoldedCost * Count;
				312	}
				313	assert(TotalFoldedCost <= TotalMatCost && "If each constant's folded cost is "
				314	"less that its materialized cost, "
				315	"the sum must be as well.");
				316
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	317	LLVM_DEBUG(dbgs() << " Cost savings " << (TotalMatCost - TotalFoldedCost)
				318	<< ": " << PN << "\n");
Chandler Carruth	c34f789	2017-11-28 11:32:31 +0000	[diff] [blame]	319	CostSavingsMap[&PN] = TotalMatCost - TotalFoldedCost;
				320	return true;
				321	}
				322
				323	/// Simple helper to walk all the users of a list of phis depth first, and call
				324	/// a visit function on each one in post-order.
				325	///
				326	/// All of the PHIs should be in the same basic block, and this is primarily
				327	/// used to make a single depth-first walk across their collective users
				328	/// without revisiting any subgraphs. Callers should provide a fast, idempotent
				329	/// callable to test whether a node has been visited and the more important
				330	/// callable to actually visit a particular node.
				331	///
				332	/// Depth-first and postorder here refer to the operand graph -- we start
				333	/// from a collection of users of PHI nodes and walk "up" the operands
				334	/// depth-first.
				335	template <typename IsVisitedT, typename VisitT>
				336	static void visitPHIUsersAndDepsInPostOrder(ArrayRef<PHINode *> PNs,
				337	IsVisitedT IsVisited,
				338	VisitT Visit) {
				339	SmallVector<std::pair<Instruction *, User::value_op_iterator>, 16> DFSStack;
				340	for (auto *PN : PNs)
				341	for (Use &U : PN->uses()) {
				342	auto *UI = cast<Instruction>(U.getUser());
				343	if (IsVisited(UI))
				344	// Already visited this user, continue across the roots.
				345	continue;
				346
				347	// Otherwise, walk the operand graph depth-first and visit each
				348	// dependency in postorder.
				349	DFSStack.push_back({UI, UI->value_op_begin()});
				350	do {
				351	User::value_op_iterator OpIt;
				352	std::tie(UI, OpIt) = DFSStack.pop_back_val();
				353	while (OpIt != UI->value_op_end()) {
				354	auto OpI = dyn_cast<Instruction>(OpIt);
				355	// Increment to the next operand for whenever we continue.
				356	++OpIt;
				357	// No need to visit non-instructions, which can't form dependencies,
				358	// or instructions outside of our potential dependency set that we
				359	// were given. Finally, if we've already visited the node, continue
				360	// to the next.
				361	if (!OpI \|\| IsVisited(OpI))
				362	continue;
				363
				364	// Push onto the stack and descend. We can directly continue this
				365	// loop when ascending.
				366	DFSStack.push_back({UI, OpIt});
				367	UI = OpI;
				368	OpIt = OpI->value_op_begin();
				369	}
				370
				371	// Finished visiting children, visit this node.
				372	assert(!IsVisited(UI) && "Should not have already visited a node!");
				373	Visit(UI);
				374	} while (!DFSStack.empty());
				375	}
				376	}
				377
				378	/// Find profitable PHIs to speculate.
				379	///
				380	/// For a PHI node to be profitable, we need the cost of speculating its users
				381	/// (and their dependencies) to not exceed the savings of folding the PHI's
				382	/// constant operands into the speculated users.
				383	///
				384	/// Computing this is surprisingly challenging. Because users of two different
				385	/// PHI nodes can depend on each other or on common other instructions, it may
				386	/// be profitable to speculate two PHI nodes together even though neither one
				387	/// in isolation is profitable. The straightforward way to find all the
				388	/// profitable PHIs would be to check each combination of PHIs' cost, but this
				389	/// is exponential in complexity.
				390	///
				391	/// Even if we assume that we only care about cases where we can consider each
				392	/// PHI node in isolation (rather than considering cases where none are
				393	/// profitable in isolation but some subset are profitable as a set), we still
				394	/// have a challenge. The obvious way to find all individually profitable PHIs
				395	/// is to iterate until reaching a fixed point, but this will be quadratic in
				396	/// complexity. =/
				397	///
				398	/// This code currently uses a linear-to-compute order for a greedy approach.
				399	/// It won't find cases where a set of PHIs must be considered together, but it
				400	/// handles most cases of order dependence without quadratic iteration. The
				401	/// specific order used is the post-order across the operand DAG. When the last
				402	/// user of a PHI is visited in this postorder walk, we check it for
				403	/// profitability.
				404	///
				405	/// There is an orthogonal extra complexity to all of this: computing the cost
				406	/// itself can easily become a linear computation making everything again (at
				407	/// best) quadratic. Using a postorder over the operand graph makes it
				408	/// particularly easy to avoid this through dynamic programming. As we do the
				409	/// postorder walk, we build the transitive cost of that subgraph. It is also
				410	/// straightforward to then update these costs when we mark a PHI for
				411	/// speculation so that subsequent PHIs don't re-pay the cost of already
				412	/// speculated instructions.
				413	static SmallVector<PHINode *, 16>
				414	findProfitablePHIs(ArrayRef<PHINode *> PNs,
				415	const SmallDenseMap<PHINode *, int, 16> &CostSavingsMap,
				416	const SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
				417	int NumPreds, DominatorTree &DT, TargetTransformInfo &TTI) {
				418	SmallVector<PHINode *, 16> SpecPNs;
				419
				420	// First, establish a reverse mapping from immediate users of the PHI nodes
				421	// to the nodes themselves, and count how many users each PHI node has in
				422	// a way we can update while processing them.
				423	SmallDenseMap<Instruction , TinyPtrVector<PHINode >, 16> UserToPNMap;
				424	SmallDenseMap<PHINode *, int, 16> PNUserCountMap;
				425	SmallPtrSet<Instruction *, 16> UserSet;
				426	for (auto *PN : PNs) {
				427	assert(UserSet.empty() && "Must start with an empty user set!");
				428	for (Use &U : PN->uses())
				429	UserSet.insert(cast<Instruction>(U.getUser()));
				430	PNUserCountMap[PN] = UserSet.size();
				431	for (auto *UI : UserSet)
				432	UserToPNMap.insert({UI, {}}).first->second.push_back(PN);
				433	UserSet.clear();
				434	}
				435
				436	// Now do a DFS across the operand graph of the users, computing cost as we
				437	// go and when all costs for a given PHI are known, checking that PHI for
				438	// profitability.
				439	SmallDenseMap<Instruction *, int, 16> SpecCostMap;
				440	visitPHIUsersAndDepsInPostOrder(
				441	PNs,
				442	/IsVisited/
				443	[&](Instruction *I) {
				444	// We consider anything that isn't potentially speculated to be
				445	// "visited" as it is already handled. Similarly, anything that is
				446	// potentially speculated but for which we have an entry in our cost
				447	// map, we're done.
				448	return !PotentialSpecSet.count(I) \|\| SpecCostMap.count(I);
				449	},
				450	/Visit/
				451	[&](Instruction *I) {
				452	// We've fully visited the operands, so sum their cost with this node
				453	// and update the cost map.
				454	int Cost = TTI.TCC_Free;
				455	for (Value *OpV : I->operand_values())
				456	if (auto *OpI = dyn_cast<Instruction>(OpV)) {
				457	auto CostMapIt = SpecCostMap.find(OpI);
				458	if (CostMapIt != SpecCostMap.end())
				459	Cost += CostMapIt->second;
				460	}
				461	Cost += TTI.getUserCost(I);
				462	bool Inserted = SpecCostMap.insert({I, Cost}).second;
				463	(void)Inserted;
				464	assert(Inserted && "Must not re-insert a cost during the DFS!");
				465
				466	// Now check if this node had a corresponding PHI node using it. If so,
				467	// we need to decrement the outstanding user count for it.
				468	auto UserPNsIt = UserToPNMap.find(I);
				469	if (UserPNsIt == UserToPNMap.end())
				470	return;
				471	auto &UserPNs = UserPNsIt->second;
				472	auto UserPNsSplitIt = std::stable_partition(
				473	UserPNs.begin(), UserPNs.end(), [&](PHINode *UserPN) {
				474	int &PNUserCount = PNUserCountMap.find(UserPN)->second;
				475	assert(
				476	PNUserCount > 0 &&
				477	"Should never re-visit a PN after its user count hits zero!");
				478	--PNUserCount;
				479	return PNUserCount != 0;
				480	});
				481
				482	// FIXME: Rather than one at a time, we should sum the savings as the
				483	// cost will be completely shared.
				484	SmallVector<Instruction *, 16> SpecWorklist;
				485	for (auto *PN : llvm::make_range(UserPNsSplitIt, UserPNs.end())) {
				486	int SpecCost = TTI.TCC_Free;
				487	for (Use &U : PN->uses())
				488	SpecCost +=
				489	SpecCostMap.find(cast<Instruction>(U.getUser()))->second;
				490	SpecCost *= (NumPreds - 1);
				491	// When the user count of a PHI node hits zero, we should check its
				492	// profitability. If profitable, we should mark it for speculation
				493	// and zero out the cost of everything it depends on.
				494	int CostSavings = CostSavingsMap.find(PN)->second;
				495	if (SpecCost > CostSavings) {
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	496	LLVM_DEBUG(dbgs() << " Not profitable, speculation cost: " << *PN
				497	<< "\n"
				498	" Cost savings: "
				499	<< CostSavings
				500	<< "\n"
				501	" Speculation cost: "
				502	<< SpecCost << "\n");
Chandler Carruth	c34f789	2017-11-28 11:32:31 +0000	[diff] [blame]	503	continue;
				504	}
				505
				506	// We're going to speculate this user-associated PHI. Copy it out and
				507	// add its users to the worklist to update their cost.
				508	SpecPNs.push_back(PN);
				509	for (Use &U : PN->uses()) {
				510	auto *UI = cast<Instruction>(U.getUser());
				511	auto CostMapIt = SpecCostMap.find(UI);
				512	if (CostMapIt->second == 0)
				513	continue;
				514	// Zero out this cost entry to avoid duplicates.
				515	CostMapIt->second = 0;
				516	SpecWorklist.push_back(UI);
				517	}
				518	}
				519
				520	// Now walk all the operands of the users in the worklist transitively
				521	// to zero out all the memoized costs.
				522	while (!SpecWorklist.empty()) {
				523	Instruction *SpecI = SpecWorklist.pop_back_val();
				524	assert(SpecCostMap.find(SpecI)->second == 0 &&
				525	"Didn't zero out a cost!");
				526
				527	// Walk the operands recursively to zero out their cost as well.
				528	for (auto *OpV : SpecI->operand_values()) {
				529	auto *OpI = dyn_cast<Instruction>(OpV);
				530	if (!OpI)
				531	continue;
				532	auto CostMapIt = SpecCostMap.find(OpI);
				533	if (CostMapIt == SpecCostMap.end() \|\| CostMapIt->second == 0)
				534	continue;
				535	CostMapIt->second = 0;
				536	SpecWorklist.push_back(OpI);
				537	}
				538	}
				539	});
				540
				541	return SpecPNs;
				542	}
				543
				544	/// Speculate users around a set of PHI nodes.
				545	///
				546	/// This routine does the actual speculation around a set of PHI nodes where we
				547	/// have determined this to be both safe and profitable.
				548	///
				549	/// This routine handles any spliting of critical edges necessary to create
				550	/// a safe block to speculate into as well as cloning the instructions and
				551	/// rewriting all uses.
				552	static void speculatePHIs(ArrayRef<PHINode *> SpecPNs,
				553	SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
				554	SmallSetVector<BasicBlock *, 16> &PredSet,
				555	DominatorTree &DT) {
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	556	LLVM_DEBUG(dbgs() << " Speculating around " << SpecPNs.size() << " PHIs!\n");
Chandler Carruth	c34f789	2017-11-28 11:32:31 +0000	[diff] [blame]	557	NumPHIsSpeculated += SpecPNs.size();
				558
				559	// Split any critical edges so that we have a block to hoist into.
				560	auto *ParentBB = SpecPNs[0]->getParent();
				561	SmallVector<BasicBlock *, 16> SpecPreds;
				562	SpecPreds.reserve(PredSet.size());
				563	for (auto *PredBB : PredSet) {
				564	auto *NewPredBB = SplitCriticalEdge(
				565	PredBB, ParentBB,
				566	CriticalEdgeSplittingOptions(&DT).setMergeIdenticalEdges());
				567	if (NewPredBB) {
				568	++NumEdgesSplit;
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	569	LLVM_DEBUG(dbgs() << " Split critical edge from: " << PredBB->getName()
				570	<< "\n");
Chandler Carruth	c34f789	2017-11-28 11:32:31 +0000	[diff] [blame]	571	SpecPreds.push_back(NewPredBB);
				572	} else {
				573	assert(PredBB->getSingleSuccessor() == ParentBB &&
				574	"We need a non-critical predecessor to speculate into.");
				575	assert(!isa<InvokeInst>(PredBB->getTerminator()) &&
				576	"Cannot have a non-critical invoke!");
				577
				578	// Already non-critical, use existing pred.
				579	SpecPreds.push_back(PredBB);
				580	}
				581	}
				582
				583	SmallPtrSet<Instruction *, 16> SpecSet;
				584	SmallVector<Instruction *, 16> SpecList;
				585	visitPHIUsersAndDepsInPostOrder(SpecPNs,
				586	/IsVisited/
				587	[&](Instruction *I) {
				588	// This is visited if we don't need to
				589	// speculate it or we already have
				590	// speculated it.
				591	return !PotentialSpecSet.count(I) \|\|
				592	SpecSet.count(I);
				593	},
				594	/Visit/
				595	[&](Instruction *I) {
				596	// All operands scheduled, schedule this
				597	// node.
				598	SpecSet.insert(I);
				599	SpecList.push_back(I);
				600	});
				601
				602	int NumSpecInsts = SpecList.size() * SpecPreds.size();
				603	int NumRedundantInsts = NumSpecInsts - SpecList.size();
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	604	LLVM_DEBUG(dbgs() << " Inserting " << NumSpecInsts
				605	<< " speculated instructions, " << NumRedundantInsts
				606	<< " redundancies\n");
Chandler Carruth	c34f789	2017-11-28 11:32:31 +0000	[diff] [blame]	607	NumSpeculatedInstructions += NumSpecInsts;
				608	NumNewRedundantInstructions += NumRedundantInsts;
				609
				610	// Each predecessor is numbered by its index in `SpecPreds`, so for each
				611	// instruction we speculate, the speculated instruction is stored in that
Hiroshi Inoue	f209649	2018-06-14 05:41:49 +0000	[diff] [blame]	612	// index of the vector associated with the original instruction. We also
Chandler Carruth	c34f789	2017-11-28 11:32:31 +0000	[diff] [blame]	613	// store the incoming values for each predecessor from any PHIs used.
				614	SmallDenseMap<Instruction , SmallVector<Value , 2>, 16> SpeculatedValueMap;
				615
				616	// Inject the synthetic mappings to rewrite PHIs to the appropriate incoming
				617	// value. This handles both the PHIs we are speculating around and any other
				618	// PHIs that happen to be used.
				619	for (auto *OrigI : SpecList)
				620	for (auto *OpV : OrigI->operand_values()) {
				621	auto *OpPN = dyn_cast<PHINode>(OpV);
				622	if (!OpPN \|\| OpPN->getParent() != ParentBB)
				623	continue;
				624
				625	auto InsertResult = SpeculatedValueMap.insert({OpPN, {}});
				626	if (!InsertResult.second)
				627	continue;
				628
				629	auto &SpeculatedVals = InsertResult.first->second;
				630
				631	// Populating our structure for mapping is particularly annoying because
				632	// finding an incoming value for a particular predecessor block in a PHI
				633	// node is a linear time operation! To avoid quadratic behavior, we build
				634	// a map for this PHI node's incoming values and then translate it into
				635	// the more compact representation used below.
				636	SmallDenseMap<BasicBlock , Value , 16> IncomingValueMap;
				637	for (int i : llvm::seq<int>(0, OpPN->getNumIncomingValues()))
				638	IncomingValueMap[OpPN->getIncomingBlock(i)] = OpPN->getIncomingValue(i);
				639
				640	for (auto *PredBB : SpecPreds)
				641	SpeculatedVals.push_back(IncomingValueMap.find(PredBB)->second);
				642	}
				643
				644	// Speculate into each predecessor.
				645	for (int PredIdx : llvm::seq<int>(0, SpecPreds.size())) {
				646	auto *PredBB = SpecPreds[PredIdx];
				647	assert(PredBB->getSingleSuccessor() == ParentBB &&
				648	"We need a non-critical predecessor to speculate into.");
				649
				650	for (auto *OrigI : SpecList) {
				651	auto *NewI = OrigI->clone();
				652	NewI->setName(Twine(OrigI->getName()) + "." + Twine(PredIdx));
				653	NewI->insertBefore(PredBB->getTerminator());
				654
				655	// Rewrite all the operands to the previously speculated instructions.
				656	// Because we're walking in-order, the defs must precede the uses and we
				657	// should already have these mappings.
				658	for (Use &U : NewI->operands()) {
				659	auto *OpI = dyn_cast<Instruction>(U.get());
				660	if (!OpI)
				661	continue;
				662	auto MapIt = SpeculatedValueMap.find(OpI);
				663	if (MapIt == SpeculatedValueMap.end())
				664	continue;
				665	const auto &SpeculatedVals = MapIt->second;
				666	assert(SpeculatedVals[PredIdx] &&
				667	"Must have a speculated value for this predecessor!");
				668	assert(SpeculatedVals[PredIdx]->getType() == OpI->getType() &&
				669	"Speculated value has the wrong type!");
				670
				671	// Rewrite the use to this predecessor's speculated instruction.
				672	U.set(SpeculatedVals[PredIdx]);
				673	}
				674
				675	// Commute instructions which now have a constant in the LHS but not the
				676	// RHS.
				677	if (NewI->isBinaryOp() && NewI->isCommutative() &&
				678	isa<Constant>(NewI->getOperand(0)) &&
				679	!isa<Constant>(NewI->getOperand(1)))
				680	NewI->getOperandUse(0).swap(NewI->getOperandUse(1));
				681
				682	SpeculatedValueMap[OrigI].push_back(NewI);
				683	assert(SpeculatedValueMap[OrigI][PredIdx] == NewI &&
				684	"Mismatched speculated instruction index!");
				685	}
				686	}
				687
				688	// Walk the speculated instruction list and if they have uses, insert a PHI
				689	// for them from the speculated versions, and replace the uses with the PHI.
				690	// Then erase the instructions as they have been fully speculated. The walk
				691	// needs to be in reverse so that we don't think there are users when we'll
				692	// actually eventually remove them later.
				693	IRBuilder<> IRB(SpecPNs[0]);
				694	for (auto *OrigI : llvm::reverse(SpecList)) {
				695	// Check if we need a PHI for any remaining users and if so, insert it.
				696	if (!OrigI->use_empty()) {
				697	auto *SpecIPN = IRB.CreatePHI(OrigI->getType(), SpecPreds.size(),
				698	Twine(OrigI->getName()) + ".phi");
				699	// Add the incoming values we speculated.
				700	auto &SpeculatedVals = SpeculatedValueMap.find(OrigI)->second;
				701	for (int PredIdx : llvm::seq<int>(0, SpecPreds.size()))
				702	SpecIPN->addIncoming(SpeculatedVals[PredIdx], SpecPreds[PredIdx]);
				703
				704	// And replace the uses with the PHI node.
				705	OrigI->replaceAllUsesWith(SpecIPN);
				706	}
				707
				708	// It is important to immediately erase this so that it stops using other
				709	// instructions. This avoids inserting needless PHIs of them.
				710	OrigI->eraseFromParent();
				711	}
				712
				713	// All of the uses of the speculated phi nodes should be removed at this
				714	// point, so erase them.
				715	for (auto *SpecPN : SpecPNs) {
				716	assert(SpecPN->use_empty() && "All users should have been speculated!");
				717	SpecPN->eraseFromParent();
				718	}
				719	}
				720
				721	/// Try to speculate around a series of PHIs from a single basic block.
				722	///
				723	/// This routine checks whether any of these PHIs are profitable to speculate
				724	/// users around. If safe and profitable, it does the speculation. It returns
				725	/// true when at least some speculation occurs.
				726	static bool tryToSpeculatePHIs(SmallVectorImpl<PHINode *> &PNs,
				727	DominatorTree &DT, TargetTransformInfo &TTI) {
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	728	LLVM_DEBUG(dbgs() << "Evaluating phi nodes for speculation:\n");
Chandler Carruth	c34f789	2017-11-28 11:32:31 +0000	[diff] [blame]	729
				730	// Savings in cost from speculating around a PHI node.
				731	SmallDenseMap<PHINode *, int, 16> CostSavingsMap;
				732
				733	// Remember the set of instructions that are candidates for speculation so
				734	// that we can quickly walk things within that space. This prunes out
				735	// instructions already available along edges, etc.
				736	SmallPtrSet<Instruction *, 16> PotentialSpecSet;
				737
				738	// Remember the set of instructions that are (transitively) unsafe to
				739	// speculate into the incoming edges of this basic block. This avoids
				740	// recomputing them for each PHI node we check. This set is specific to this
				741	// block though as things are pruned out of it based on what is available
				742	// along incoming edges.
				743	SmallPtrSet<Instruction *, 16> UnsafeSet;
				744
				745	// For each PHI node in this block, check whether there are immediate folding
				746	// opportunities from speculation, and whether that speculation will be
				747	// valid. This determise the set of safe PHIs to speculate.
				748	PNs.erase(llvm::remove_if(PNs,
				749	[&](PHINode *PN) {
				750	return !isSafeAndProfitableToSpeculateAroundPHI(
				751	*PN, CostSavingsMap, PotentialSpecSet,
				752	UnsafeSet, DT, TTI);
				753	}),
				754	PNs.end());
				755	// If no PHIs were profitable, skip.
				756	if (PNs.empty()) {
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	757	LLVM_DEBUG(dbgs() << " No safe and profitable PHIs found!\n");
Chandler Carruth	c34f789	2017-11-28 11:32:31 +0000	[diff] [blame]	758	return false;
				759	}
				760
				761	// We need to know how much speculation will cost which is determined by how
				762	// many incoming edges will need a copy of each speculated instruction.
				763	SmallSetVector<BasicBlock *, 16> PredSet;
				764	for (auto *PredBB : PNs[0]->blocks()) {
				765	if (!PredSet.insert(PredBB))
				766	continue;
				767
				768	// We cannot speculate when a predecessor is an indirect branch.
				769	// FIXME: We also can't reliably create a non-critical edge block for
				770	// speculation if the predecessor is an invoke. This doesn't seem
				771	// fundamental and we should probably be splitting critical edges
				772	// differently.
				773	if (isa<IndirectBrInst>(PredBB->getTerminator()) \|\|
				774	isa<InvokeInst>(PredBB->getTerminator())) {
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	775	LLVM_DEBUG(dbgs() << " Invalid: predecessor terminator: "
				776	<< PredBB->getName() << "\n");
Chandler Carruth	c34f789	2017-11-28 11:32:31 +0000	[diff] [blame]	777	return false;
				778	}
				779	}
				780	if (PredSet.size() < 2) {
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	781	LLVM_DEBUG(dbgs() << " Unimportant: phi with only one predecessor\n");
Chandler Carruth	c34f789	2017-11-28 11:32:31 +0000	[diff] [blame]	782	return false;
				783	}
				784
				785	SmallVector<PHINode *, 16> SpecPNs = findProfitablePHIs(
				786	PNs, CostSavingsMap, PotentialSpecSet, PredSet.size(), DT, TTI);
				787	if (SpecPNs.empty())
				788	// Nothing to do.
				789	return false;
				790
				791	speculatePHIs(SpecPNs, PotentialSpecSet, PredSet, DT);
				792	return true;
				793	}
				794
				795	PreservedAnalyses SpeculateAroundPHIsPass::run(Function &F,
				796	FunctionAnalysisManager &AM) {
				797	auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
				798	auto &TTI = AM.getResult<TargetIRAnalysis>(F);
				799
				800	bool Changed = false;
				801	for (auto BB : ReversePostOrderTraversal<Function >(&F)) {
				802	SmallVector<PHINode *, 16> PNs;
				803	auto BBI = BB->begin();
				804	while (auto PN = dyn_cast<PHINode>(&BBI)) {
				805	PNs.push_back(PN);
				806	++BBI;
				807	}
				808
				809	if (PNs.empty())
				810	continue;
				811
				812	Changed \|= tryToSpeculatePHIs(PNs, DT, TTI);
				813	}
				814
				815	if (!Changed)
				816	return PreservedAnalyses::all();
				817
				818	PreservedAnalyses PA;
				819	return PA;
				820	}