Blame - llvm/lib/Analysis/DivergenceAnalysis.cpp - toolchain/llvm-project

blob: 93a288858ba8f02bd654d2ec2d175c097b402b49 [file] [log] [blame]

Marcello Maggioni	ab58c74	2015-09-21 17:58:14 +0000	[diff] [blame]	1	//===- DivergenceAnalysis.cpp --------- Divergence Analysis Implementation -==//
Jingyue Wu	5da831c	2015-04-10 05:03:50 +0000	[diff] [blame]	2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
Marcello Maggioni	ab58c74	2015-09-21 17:58:14 +0000	[diff] [blame]	10	// This file implements divergence analysis which determines whether a branch
				11	// in a GPU program is divergent.It can help branch optimizations such as jump
Jingyue Wu	5da831c	2015-04-10 05:03:50 +0000	[diff] [blame]	12	// threading and loop unswitching to make better decisions.
				13	//
				14	// GPU programs typically use the SIMD execution model, where multiple threads
				15	// in the same execution group have to execute in lock-step. Therefore, if the
				16	// code contains divergent branches (i.e., threads in a group do not agree on
				17	// which path of the branch to take), the group of threads has to execute all
				18	// the paths from that branch with different subsets of threads enabled until
				19	// they converge at the immediately post-dominating BB of the paths.
				20	//
				21	// Due to this execution model, some optimizations such as jump
				22	// threading and loop unswitching can be unfortunately harmful when performed on
				23	// divergent branches. Therefore, an analysis that computes which branches in a
				24	// GPU program are divergent can help the compiler to selectively run these
				25	// optimizations.
				26	//
				27	// This file defines divergence analysis which computes a conservative but
				28	// non-trivial approximation of all divergent branches in a GPU program. It
				29	// partially implements the approach described in
				30	//
				31	// Divergence Analysis
				32	// Sampaio, Souza, Collange, Pereira
				33	// TOPLAS '13
				34	//
				35	// The divergence analysis identifies the sources of divergence (e.g., special
				36	// variables that hold the thread ID), and recursively marks variables that are
				37	// data or sync dependent on a source of divergence as divergent.
				38	//
				39	// While data dependency is a well-known concept, the notion of sync dependency
				40	// is worth more explanation. Sync dependence characterizes the control flow
				41	// aspect of the propagation of branch divergence. For example,
				42	//
				43	// %cond = icmp slt i32 %tid, 10
				44	// br i1 %cond, label %then, label %else
				45	// then:
				46	// br label %merge
				47	// else:
				48	// br label %merge
				49	// merge:
				50	// %a = phi i32 [ 0, %then ], [ 1, %else ]
				51	//
				52	// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
				53	// because %tid is not on its use-def chains, %a is sync dependent on %tid
				54	// because the branch "br i1 %cond" depends on %tid and affects which value %a
				55	// is assigned to.
				56	//
				57	// The current implementation has the following limitations:
				58	// 1. intra-procedural. It conservatively considers the arguments of a
				59	// non-kernel-entry function and the return value of a function call as
				60	// divergent.
				61	// 2. memory as black box. It conservatively considers values loaded from
				62	// generic or local address as divergent. This can be improved by leveraging
				63	// pointer analysis.
Marcello Maggioni	ab58c74	2015-09-21 17:58:14 +0000	[diff] [blame]	64	//
Jingyue Wu	5da831c	2015-04-10 05:03:50 +0000	[diff] [blame]	65	//===----------------------------------------------------------------------===//
				66
Marcello Maggioni	ab58c74	2015-09-21 17:58:14 +0000	[diff] [blame]	67	#include "llvm/Analysis/DivergenceAnalysis.h"
Jingyue Wu	5da831c	2015-04-10 05:03:50 +0000	[diff] [blame]	68	#include "llvm/Analysis/Passes.h"
				69	#include "llvm/Analysis/PostDominators.h"
				70	#include "llvm/Analysis/TargetTransformInfo.h"
Marcello Maggioni	ab58c74	2015-09-21 17:58:14 +0000	[diff] [blame]	71	#include "llvm/IR/Dominators.h"
Jingyue Wu	5da831c	2015-04-10 05:03:50 +0000	[diff] [blame]	72	#include "llvm/IR/InstIterator.h"
				73	#include "llvm/IR/Instructions.h"
				74	#include "llvm/IR/IntrinsicInst.h"
				75	#include "llvm/IR/Value.h"
Jingyue Wu	5da831c	2015-04-10 05:03:50 +0000	[diff] [blame]	76	#include "llvm/Support/CommandLine.h"
				77	#include "llvm/Support/Debug.h"
				78	#include "llvm/Support/raw_ostream.h"
				79	#include "llvm/Transforms/Scalar.h"
Marcello Maggioni	ab58c74	2015-09-21 17:58:14 +0000	[diff] [blame]	80	#include <vector>
Jingyue Wu	5da831c	2015-04-10 05:03:50 +0000	[diff] [blame]	81	using namespace llvm;
				82
Jingyue Wu	5da831c	2015-04-10 05:03:50 +0000	[diff] [blame]	83	namespace {
				84
				85	class DivergencePropagator {
				86	public:
Marcello Maggioni	ab58c74	2015-09-21 17:58:14 +0000	[diff] [blame]	87	DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
				88	PostDominatorTree &PDT, DenseSet<const Value *> &DV)
Jingyue Wu	5da831c	2015-04-10 05:03:50 +0000	[diff] [blame]	89	: F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {}
				90	void populateWithSourcesOfDivergence();
				91	void propagate();
				92
				93	private:
				94	// A helper function that explores data dependents of V.
				95	void exploreDataDependency(Value *V);
				96	// A helper function that explores sync dependents of TI.
				97	void exploreSyncDependency(TerminatorInst *TI);
				98	// Computes the influence region from Start to End. This region includes all
				99	// basic blocks on any path from Start to End.
				100	void computeInfluenceRegion(BasicBlock Start, BasicBlock End,
				101	DenseSet<BasicBlock *> &InfluenceRegion);
				102	// Finds all users of I that are outside the influence region, and add these
				103	// users to Worklist.
				104	void findUsersOutsideInfluenceRegion(
				105	Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion);
				106
				107	Function &F;
				108	TargetTransformInfo &TTI;
				109	DominatorTree &DT;
				110	PostDominatorTree &PDT;
				111	std::vector<Value *> Worklist; // Stack for DFS.
Marcello Maggioni	ab58c74	2015-09-21 17:58:14 +0000	[diff] [blame]	112	DenseSet<const Value *> &DV; // Stores all divergent values.
Jingyue Wu	5da831c	2015-04-10 05:03:50 +0000	[diff] [blame]	113	};
				114
				115	void DivergencePropagator::populateWithSourcesOfDivergence() {
				116	Worklist.clear();
				117	DV.clear();
Nico Rieck	7819951	2015-08-06 19:10:45 +0000	[diff] [blame]	118	for (auto &I : instructions(F)) {
Jingyue Wu	5da831c	2015-04-10 05:03:50 +0000	[diff] [blame]	119	if (TTI.isSourceOfDivergence(&I)) {
				120	Worklist.push_back(&I);
				121	DV.insert(&I);
				122	}
				123	}
				124	for (auto &Arg : F.args()) {
				125	if (TTI.isSourceOfDivergence(&Arg)) {
				126	Worklist.push_back(&Arg);
				127	DV.insert(&Arg);
				128	}
				129	}
				130	}
				131
				132	void DivergencePropagator::exploreSyncDependency(TerminatorInst *TI) {
				133	// Propagation rule 1: if branch TI is divergent, all PHINodes in TI's
				134	// immediate post dominator are divergent. This rule handles if-then-else
				135	// patterns. For example,
				136	//
				137	// if (tid < 5)
				138	// a1 = 1;
				139	// else
				140	// a2 = 2;
				141	// a = phi(a1, a2); // sync dependent on (tid < 5)
				142	BasicBlock *ThisBB = TI->getParent();
				143	BasicBlock *IPostDom = PDT.getNode(ThisBB)->getIDom()->getBlock();
				144	if (IPostDom == nullptr)
				145	return;
				146
				147	for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) {
				148	// A PHINode is uniform if it returns the same value no matter which path is
				149	// taken.
Duncan P. N. Exon Smith	5a82c91	2015-10-10 00:53:03 +0000	[diff] [blame^]	150	if (!cast<PHINode>(I)->hasConstantValue() && DV.insert(&*I).second)
				151	Worklist.push_back(&*I);
Jingyue Wu	5da831c	2015-04-10 05:03:50 +0000	[diff] [blame]	152	}
				153
				154	// Propagation rule 2: if a value defined in a loop is used outside, the user
				155	// is sync dependent on the condition of the loop exits that dominate the
				156	// user. For example,
				157	//
				158	// int i = 0;
				159	// do {
				160	// i++;
				161	// if (foo(i)) ... // uniform
				162	// } while (i < tid);
				163	// if (bar(i)) ... // divergent
				164	//
				165	// A program may contain unstructured loops. Therefore, we cannot leverage
				166	// LoopInfo, which only recognizes natural loops.
				167	//
				168	// The algorithm used here handles both natural and unstructured loops. Given
				169	// a branch TI, we first compute its influence region, the union of all simple
				170	// paths from TI to its immediate post dominator (IPostDom). Then, we search
				171	// for all the values defined in the influence region but used outside. All
				172	// these users are sync dependent on TI.
				173	DenseSet<BasicBlock *> InfluenceRegion;
				174	computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion);
				175	// An insight that can speed up the search process is that all the in-region
				176	// values that are used outside must dominate TI. Therefore, instead of
				177	// searching every basic blocks in the influence region, we search all the
				178	// dominators of TI until it is outside the influence region.
				179	BasicBlock *InfluencedBB = ThisBB;
				180	while (InfluenceRegion.count(InfluencedBB)) {
				181	for (auto &I : *InfluencedBB)
				182	findUsersOutsideInfluenceRegion(I, InfluenceRegion);
				183	DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom();
				184	if (IDomNode == nullptr)
				185	break;
				186	InfluencedBB = IDomNode->getBlock();
				187	}
				188	}
				189
				190	void DivergencePropagator::findUsersOutsideInfluenceRegion(
				191	Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) {
				192	for (User *U : I.users()) {
				193	Instruction *UserInst = cast<Instruction>(U);
				194	if (!InfluenceRegion.count(UserInst->getParent())) {
				195	if (DV.insert(UserInst).second)
				196	Worklist.push_back(UserInst);
				197	}
				198	}
				199	}
				200
				201	void DivergencePropagator::computeInfluenceRegion(
				202	BasicBlock Start, BasicBlock End,
				203	DenseSet<BasicBlock *> &InfluenceRegion) {
				204	assert(PDT.properlyDominates(End, Start) &&
				205	"End does not properly dominate Start");
				206	std::vector<BasicBlock *> InfluenceStack;
				207	InfluenceStack.push_back(Start);
				208	InfluenceRegion.insert(Start);
				209	while (!InfluenceStack.empty()) {
				210	BasicBlock *BB = InfluenceStack.back();
				211	InfluenceStack.pop_back();
				212	for (BasicBlock *Succ : successors(BB)) {
				213	if (End != Succ && InfluenceRegion.insert(Succ).second)
				214	InfluenceStack.push_back(Succ);
				215	}
				216	}
				217	}
				218
				219	void DivergencePropagator::exploreDataDependency(Value *V) {
				220	// Follow def-use chains of V.
				221	for (User *U : V->users()) {
				222	Instruction *UserInst = cast<Instruction>(U);
				223	if (DV.insert(UserInst).second)
				224	Worklist.push_back(UserInst);
				225	}
				226	}
				227
				228	void DivergencePropagator::propagate() {
				229	// Traverse the dependency graph using DFS.
				230	while (!Worklist.empty()) {
				231	Value *V = Worklist.back();
				232	Worklist.pop_back();
				233	if (TerminatorInst *TI = dyn_cast<TerminatorInst>(V)) {
				234	// Terminators with less than two successors won't introduce sync
				235	// dependency. Ignore them.
				236	if (TI->getNumSuccessors() > 1)
				237	exploreSyncDependency(TI);
				238	}
				239	exploreDataDependency(V);
				240	}
				241	}
				242
Alexander Kornienko	f00654e	2015-06-23 09:49:53 +0000	[diff] [blame]	243	} /// end namespace anonymous
Jingyue Wu	5da831c	2015-04-10 05:03:50 +0000	[diff] [blame]	244
Marcello Maggioni	ab58c74	2015-09-21 17:58:14 +0000	[diff] [blame]	245	// Register this pass.
				246	char DivergenceAnalysis::ID = 0;
				247	INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis",
				248	false, true)
				249	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
				250	INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
				251	INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis",
				252	false, true)
				253
Jingyue Wu	5da831c	2015-04-10 05:03:50 +0000	[diff] [blame]	254	FunctionPass *llvm::createDivergenceAnalysisPass() {
				255	return new DivergenceAnalysis();
				256	}
				257
Marcello Maggioni	ab58c74	2015-09-21 17:58:14 +0000	[diff] [blame]	258	void DivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
				259	AU.addRequired<DominatorTreeWrapperPass>();
				260	AU.addRequired<PostDominatorTree>();
				261	AU.setPreservesAll();
				262	}
				263
Jingyue Wu	5da831c	2015-04-10 05:03:50 +0000	[diff] [blame]	264	bool DivergenceAnalysis::runOnFunction(Function &F) {
				265	auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
				266	if (TTIWP == nullptr)
				267	return false;
				268
				269	TargetTransformInfo &TTI = TTIWP->getTTI(F);
				270	// Fast path: if the target does not have branch divergence, we do not mark
				271	// any branch as divergent.
				272	if (!TTI.hasBranchDivergence())
				273	return false;
				274
				275	DivergentValues.clear();
				276	DivergencePropagator DP(F, TTI,
				277	getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
				278	getAnalysis<PostDominatorTree>(), DivergentValues);
				279	DP.populateWithSourcesOfDivergence();
				280	DP.propagate();
				281	return false;
				282	}
				283
				284	void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
				285	if (DivergentValues.empty())
				286	return;
				287	const Value FirstDivergentValue = DivergentValues.begin();
				288	const Function *F;
				289	if (const Argument *Arg = dyn_cast<Argument>(FirstDivergentValue)) {
				290	F = Arg->getParent();
				291	} else if (const Instruction *I =
				292	dyn_cast<Instruction>(FirstDivergentValue)) {
				293	F = I->getParent()->getParent();
				294	} else {
				295	llvm_unreachable("Only arguments and instructions can be divergent");
				296	}
				297
				298	// Dumps all divergent values in F, arguments and then instructions.
				299	for (auto &Arg : F->args()) {
				300	if (DivergentValues.count(&Arg))
				301	OS << "DIVERGENT: " << Arg << "\n";
				302	}
Nico Rieck	7819951	2015-08-06 19:10:45 +0000	[diff] [blame]	303	// Iterate instructions using instructions() to ensure a deterministic order.
				304	for (auto &I : instructions(F)) {
Jingyue Wu	5da831c	2015-04-10 05:03:50 +0000	[diff] [blame]	305	if (DivergentValues.count(&I))
				306	OS << "DIVERGENT:" << I << "\n";
				307	}
				308	}