Blame - llvm/lib/Analysis/SyncDependenceAnalysis.cpp - toolchain/llvm-project

blob: 54b4bb3113baf5bc0e86870e20340ffc4f712db4 [file] [log] [blame]

Nicolai Haehnle	5904168	2018-10-18 09:38:44 +0000	[diff] [blame]	1	//===- SyncDependenceAnalysis.cpp - Divergent Branch Dependence Calculation
				2	//--===//
				3	//
Chandler Carruth	2946cd7	2019-01-19 08:50:56 +0000	[diff] [blame]	4	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				5	// See https://llvm.org/LICENSE.txt for license information.
				6	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Nicolai Haehnle	5904168	2018-10-18 09:38:44 +0000	[diff] [blame]	7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file implements an algorithm that returns for a divergent branch
				11	// the set of basic blocks whose phi nodes become divergent due to divergent
				12	// control. These are the blocks that are reachable by two disjoint paths from
				13	// the branch or loop exits that have a reaching path that is disjoint from a
				14	// path to the loop latch.
				15	//
				16	// The SyncDependenceAnalysis is used in the DivergenceAnalysis to model
				17	// control-induced divergence in phi nodes.
				18	//
				19	// -- Summary --
				20	// The SyncDependenceAnalysis lazily computes sync dependences [3].
				21	// The analysis evaluates the disjoint path criterion [2] by a reduction
				22	// to SSA construction. The SSA construction algorithm is implemented as
				23	// a simple data-flow analysis [1].
				24	//
				25	// [1] "A Simple, Fast Dominance Algorithm", SPI '01, Cooper, Harvey and Kennedy
				26	// [2] "Efficiently Computing Static Single Assignment Form
				27	// and the Control Dependence Graph", TOPLAS '91,
				28	// Cytron, Ferrante, Rosen, Wegman and Zadeck
				29	// [3] "Improving Performance of OpenCL on CPUs", CC '12, Karrenberg and Hack
				30	// [4] "Divergence Analysis", TOPLAS '13, Sampaio, Souza, Collange and Pereira
				31	//
				32	// -- Sync dependence --
				33	// Sync dependence [4] characterizes the control flow aspect of the
				34	// propagation of branch divergence. For example,
				35	//
				36	// %cond = icmp slt i32 %tid, 10
				37	// br i1 %cond, label %then, label %else
				38	// then:
				39	// br label %merge
				40	// else:
				41	// br label %merge
				42	// merge:
				43	// %a = phi i32 [ 0, %then ], [ 1, %else ]
				44	//
				45	// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
				46	// because %tid is not on its use-def chains, %a is sync dependent on %tid
				47	// because the branch "br i1 %cond" depends on %tid and affects which value %a
				48	// is assigned to.
				49	//
				50	// -- Reduction to SSA construction --
				51	// There are two disjoint paths from A to X, if a certain variant of SSA
				52	// construction places a phi node in X under the following set-up scheme [2].
				53	//
				54	// This variant of SSA construction ignores incoming undef values.
				55	// That is paths from the entry without a definition do not result in
				56	// phi nodes.
				57	//
				58	// entry
				59	// / \
				60	// A \
				61	// / \ Y
				62	// B C /
				63	// \ / \ /
				64	// D E
				65	// \ /
				66	// F
				67	// Assume that A contains a divergent branch. We are interested
				68	// in the set of all blocks where each block is reachable from A
				69	// via two disjoint paths. This would be the set {D, F} in this
				70	// case.
				71	// To generally reduce this query to SSA construction we introduce
				72	// a virtual variable x and assign to x different values in each
				73	// successor block of A.
				74	// entry
				75	// / \
				76	// A \
				77	// / \ Y
				78	// x = 0 x = 1 /
				79	// \ / \ /
				80	// D E
				81	// \ /
				82	// F
				83	// Our flavor of SSA construction for x will construct the following
				84	// entry
				85	// / \
				86	// A \
				87	// / \ Y
				88	// x0 = 0 x1 = 1 /
				89	// \ / \ /
				90	// x2=phi E
				91	// \ /
				92	// x3=phi
				93	// The blocks D and F contain phi nodes and are thus each reachable
				94	// by two disjoins paths from A.
				95	//
				96	// -- Remarks --
				97	// In case of loop exits we need to check the disjoint path criterion for loops
				98	// [2]. To this end, we check whether the definition of x differs between the
				99	// loop exit and the loop header (_after_ SSA construction).
				100	//
				101	//===----------------------------------------------------------------------===//
				102	#include "llvm/ADT/PostOrderIterator.h"
				103	#include "llvm/ADT/SmallPtrSet.h"
				104	#include "llvm/Analysis/PostDominators.h"
				105	#include "llvm/Analysis/SyncDependenceAnalysis.h"
				106	#include "llvm/IR/BasicBlock.h"
				107	#include "llvm/IR/CFG.h"
				108	#include "llvm/IR/Dominators.h"
				109	#include "llvm/IR/Function.h"
				110
				111	#include <stack>
				112	#include <unordered_set>
				113
				114	#define DEBUG_TYPE "sync-dependence"
				115
				116	namespace llvm {
				117
				118	ConstBlockSet SyncDependenceAnalysis::EmptyBlockSet;
				119
				120	SyncDependenceAnalysis::SyncDependenceAnalysis(const DominatorTree &DT,
				121	const PostDominatorTree &PDT,
				122	const LoopInfo &LI)
				123	: FuncRPOT(DT.getRoot()->getParent()), DT(DT), PDT(PDT), LI(LI) {}
				124
				125	SyncDependenceAnalysis::~SyncDependenceAnalysis() {}
				126
				127	using FunctionRPOT = ReversePostOrderTraversal<const Function *>;
				128
				129	// divergence propagator for reducible CFGs
				130	struct DivergencePropagator {
				131	const FunctionRPOT &FuncRPOT;
				132	const DominatorTree &DT;
				133	const PostDominatorTree &PDT;
				134	const LoopInfo &LI;
				135
				136	// identified join points
				137	std::unique_ptr<ConstBlockSet> JoinBlocks;
				138
				139	// reached loop exits (by a path disjoint to a path to the loop header)
				140	SmallPtrSet<const BasicBlock *, 4> ReachedLoopExits;
				141
				142	// if DefMap[B] == C then C is the dominating definition at block B
				143	// if DefMap[B] ~ undef then we haven't seen B yet
				144	// if DefMap[B] == B then B is a join point of disjoint paths from X or B is
				145	// an immediate successor of X (initial value).
				146	using DefiningBlockMap = std::map<const BasicBlock , const BasicBlock >;
				147	DefiningBlockMap DefMap;
				148
				149	// all blocks with pending visits
				150	std::unordered_set<const BasicBlock *> PendingUpdates;
				151
				152	DivergencePropagator(const FunctionRPOT &FuncRPOT, const DominatorTree &DT,
				153	const PostDominatorTree &PDT, const LoopInfo &LI)
				154	: FuncRPOT(FuncRPOT), DT(DT), PDT(PDT), LI(LI),
				155	JoinBlocks(new ConstBlockSet) {}
				156
				157	// set the definition at @block and mark @block as pending for a visit
				158	void addPending(const BasicBlock &Block, const BasicBlock &DefBlock) {
				159	bool WasAdded = DefMap.emplace(&Block, &DefBlock).second;
				160	if (WasAdded)
				161	PendingUpdates.insert(&Block);
				162	}
				163
				164	void printDefs(raw_ostream &Out) {
				165	Out << "Propagator::DefMap {\n";
				166	for (const auto *Block : FuncRPOT) {
				167	auto It = DefMap.find(Block);
				168	Out << Block->getName() << " : ";
				169	if (It == DefMap.end()) {
				170	Out << "\n";
				171	} else {
				172	const auto *DefBlock = It->second;
				173	Out << (DefBlock ? DefBlock->getName() : "<null>") << "\n";
				174	}
				175	}
				176	Out << "}\n";
				177	}
				178
				179	// process @succBlock with reaching definition @defBlock
				180	// the original divergent branch was in @parentLoop (if any)
				181	void visitSuccessor(const BasicBlock &SuccBlock, const Loop *ParentLoop,
				182	const BasicBlock &DefBlock) {
				183
				184	// @succBlock is a loop exit
				185	if (ParentLoop && !ParentLoop->contains(&SuccBlock)) {
				186	DefMap.emplace(&SuccBlock, &DefBlock);
				187	ReachedLoopExits.insert(&SuccBlock);
				188	return;
				189	}
				190
				191	// first reaching def?
				192	auto ItLastDef = DefMap.find(&SuccBlock);
				193	if (ItLastDef == DefMap.end()) {
				194	addPending(SuccBlock, DefBlock);
				195	return;
				196	}
				197
				198	// a join of at least two definitions
				199	if (ItLastDef->second != &DefBlock) {
				200	// do we know this join already?
				201	if (!JoinBlocks->insert(&SuccBlock).second)
				202	return;
				203
				204	// update the definition
				205	addPending(SuccBlock, SuccBlock);
				206	}
				207	}
				208
				209	// find all blocks reachable by two disjoint paths from @rootTerm.
Chandler Carruth	608e6fa	2018-10-19 00:22:10 +0000	[diff] [blame]	210	// This method works for both divergent terminators and loops with
Nicolai Haehnle	5904168	2018-10-18 09:38:44 +0000	[diff] [blame]	211	// divergent exits.
				212	// @rootBlock is either the block containing the branch or the header of the
				213	// divergent loop.
				214	// @nodeSuccessors is the set of successors of the node (Loop or Terminator)
				215	// headed by @rootBlock.
				216	// @parentLoop is the parent loop of the Loop or the loop that contains the
				217	// Terminator.
				218	template <typename SuccessorIterable>
				219	std::unique_ptr<ConstBlockSet>
				220	computeJoinPoints(const BasicBlock &RootBlock,
				221	SuccessorIterable NodeSuccessors, const Loop *ParentLoop) {
				222	assert(JoinBlocks);
				223
				224	// immediate post dominator (no join block beyond that block)
				225	const auto PdNode = PDT.getNode(const_cast<BasicBlock >(&RootBlock));
				226	const auto *IpdNode = PdNode->getIDom();
				227	const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
				228
				229	// bootstrap with branch targets
				230	for (const auto *SuccBlock : NodeSuccessors) {
				231	DefMap.emplace(SuccBlock, SuccBlock);
				232
				233	if (ParentLoop && !ParentLoop->contains(SuccBlock)) {
				234	// immediate loop exit from node.
				235	ReachedLoopExits.insert(SuccBlock);
				236	continue;
				237	} else {
				238	// regular successor
				239	PendingUpdates.insert(SuccBlock);
				240	}
				241	}
				242
				243	auto ItBeginRPO = FuncRPOT.begin();
				244
				245	// skip until term (TODO RPOT won't let us start at @term directly)
				246	for (; *ItBeginRPO != &RootBlock; ++ItBeginRPO) {}
				247
				248	auto ItEndRPO = FuncRPOT.end();
				249	assert(ItBeginRPO != ItEndRPO);
				250
				251	// propagate definitions at the immediate successors of the node in RPO
				252	auto ItBlockRPO = ItBeginRPO;
				253	while (++ItBlockRPO != ItEndRPO && *ItBlockRPO != PdBoundBlock) {
				254	const auto Block = ItBlockRPO;
				255
				256	// skip @block if not pending update
				257	auto ItPending = PendingUpdates.find(Block);
				258	if (ItPending == PendingUpdates.end())
				259	continue;
				260	PendingUpdates.erase(ItPending);
				261
				262	// propagate definition at @block to its successors
				263	auto ItDef = DefMap.find(Block);
				264	const auto *DefBlock = ItDef->second;
				265	assert(DefBlock);
				266
				267	auto *BlockLoop = LI.getLoopFor(Block);
				268	if (ParentLoop &&
				269	(ParentLoop != BlockLoop && ParentLoop->contains(BlockLoop))) {
				270	// if the successor is the header of a nested loop pretend its a
				271	// single node with the loop's exits as successors
				272	SmallVector<BasicBlock *, 4> BlockLoopExits;
				273	BlockLoop->getExitBlocks(BlockLoopExits);
				274	for (const auto *BlockLoopExit : BlockLoopExits) {
				275	visitSuccessor(BlockLoopExit, ParentLoop, DefBlock);
				276	}
				277
				278	} else {
				279	// the successors are either on the same loop level or loop exits
				280	for (const auto *SuccBlock : successors(Block)) {
				281	visitSuccessor(SuccBlock, ParentLoop, DefBlock);
				282	}
				283	}
				284	}
				285
				286	// We need to know the definition at the parent loop header to decide
				287	// whether the definition at the header is different from the definition at
				288	// the loop exits, which would indicate a divergent loop exits.
				289	//
				290	// A // loop header
				291	// \|
				292	// B // nested loop header
				293	// \|
				294	// C -> X (exit from B loop) -..-> (A latch)
				295	// \|
				296	// D -> back to B (B latch)
				297	// \|
				298	// proper exit from both loops
				299	//
				300	// D post-dominates B as it is the only proper exit from the "A loop".
				301	// If C has a divergent branch, propagation will therefore stop at D.
				302	// That implies that B will never receive a definition.
				303	// But that definition can only be the same as at D (D itself in thise case)
				304	// because all paths to anywhere have to pass through D.
				305	//
				306	const BasicBlock *ParentLoopHeader =
				307	ParentLoop ? ParentLoop->getHeader() : nullptr;
				308	if (ParentLoop && ParentLoop->contains(PdBoundBlock)) {
				309	DefMap[ParentLoopHeader] = DefMap[PdBoundBlock];
				310	}
				311
				312	// analyze reached loop exits
				313	if (!ReachedLoopExits.empty()) {
				314	assert(ParentLoop);
				315	const auto *HeaderDefBlock = DefMap[ParentLoopHeader];
				316	LLVM_DEBUG(printDefs(dbgs()));
				317	assert(HeaderDefBlock && "no definition in header of carrying loop");
				318
				319	for (const auto *ExitBlock : ReachedLoopExits) {
				320	auto ItExitDef = DefMap.find(ExitBlock);
				321	assert((ItExitDef != DefMap.end()) &&
				322	"no reaching def at reachable loop exit");
				323	if (ItExitDef->second != HeaderDefBlock) {
				324	JoinBlocks->insert(ExitBlock);
				325	}
				326	}
				327	}
				328
				329	return std::move(JoinBlocks);
				330	}
				331	};
				332
				333	const ConstBlockSet &SyncDependenceAnalysis::join_blocks(const Loop &Loop) {
				334	using LoopExitVec = SmallVector<BasicBlock *, 4>;
				335	LoopExitVec LoopExits;
				336	Loop.getExitBlocks(LoopExits);
				337	if (LoopExits.size() < 1) {
				338	return EmptyBlockSet;
				339	}
				340
				341	// already available in cache?
				342	auto ItCached = CachedLoopExitJoins.find(&Loop);
				343	if (ItCached != CachedLoopExitJoins.end())
				344	return *ItCached->second;
				345
				346	// compute all join points
				347	DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
				348	auto JoinBlocks = Propagator.computeJoinPoints<const LoopExitVec &>(
				349	*Loop.getHeader(), LoopExits, Loop.getParentLoop());
				350
				351	auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks));
				352	assert(ItInserted.second);
				353	return *ItInserted.first->second;
				354	}
				355
				356	const ConstBlockSet &
Chandler Carruth	608e6fa	2018-10-19 00:22:10 +0000	[diff] [blame]	357	SyncDependenceAnalysis::join_blocks(const Instruction &Term) {
Nicolai Haehnle	5904168	2018-10-18 09:38:44 +0000	[diff] [blame]	358	// trivial case
				359	if (Term.getNumSuccessors() < 1) {
				360	return EmptyBlockSet;
				361	}
				362
				363	// already available in cache?
				364	auto ItCached = CachedBranchJoins.find(&Term);
				365	if (ItCached != CachedBranchJoins.end())
				366	return *ItCached->second;
				367
				368	// compute all join points
				369	DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
				370	const auto &TermBlock = *Term.getParent();
				371	auto JoinBlocks = Propagator.computeJoinPoints<succ_const_range>(
				372	TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock));
				373
				374	auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks));
				375	assert(ItInserted.second);
				376	return *ItInserted.first->second;
				377	}
				378
				379	} // namespace llvm