blob: 54b4bb3113baf5bc0e86870e20340ffc4f712db4 [file] [log] [blame]
Nicolai Haehnle59041682018-10-18 09:38:44 +00001//===- SyncDependenceAnalysis.cpp - Divergent Branch Dependence Calculation
2//--===//
3//
Chandler Carruth2946cd72019-01-19 08:50:56 +00004// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Nicolai Haehnle59041682018-10-18 09:38:44 +00007//
8//===----------------------------------------------------------------------===//
9//
10// This file implements an algorithm that returns for a divergent branch
11// the set of basic blocks whose phi nodes become divergent due to divergent
12// control. These are the blocks that are reachable by two disjoint paths from
13// the branch or loop exits that have a reaching path that is disjoint from a
14// path to the loop latch.
15//
16// The SyncDependenceAnalysis is used in the DivergenceAnalysis to model
17// control-induced divergence in phi nodes.
18//
19// -- Summary --
20// The SyncDependenceAnalysis lazily computes sync dependences [3].
21// The analysis evaluates the disjoint path criterion [2] by a reduction
22// to SSA construction. The SSA construction algorithm is implemented as
23// a simple data-flow analysis [1].
24//
25// [1] "A Simple, Fast Dominance Algorithm", SPI '01, Cooper, Harvey and Kennedy
26// [2] "Efficiently Computing Static Single Assignment Form
27// and the Control Dependence Graph", TOPLAS '91,
28// Cytron, Ferrante, Rosen, Wegman and Zadeck
29// [3] "Improving Performance of OpenCL on CPUs", CC '12, Karrenberg and Hack
30// [4] "Divergence Analysis", TOPLAS '13, Sampaio, Souza, Collange and Pereira
31//
32// -- Sync dependence --
33// Sync dependence [4] characterizes the control flow aspect of the
34// propagation of branch divergence. For example,
35//
36// %cond = icmp slt i32 %tid, 10
37// br i1 %cond, label %then, label %else
38// then:
39// br label %merge
40// else:
41// br label %merge
42// merge:
43// %a = phi i32 [ 0, %then ], [ 1, %else ]
44//
45// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
46// because %tid is not on its use-def chains, %a is sync dependent on %tid
47// because the branch "br i1 %cond" depends on %tid and affects which value %a
48// is assigned to.
49//
50// -- Reduction to SSA construction --
51// There are two disjoint paths from A to X, if a certain variant of SSA
52// construction places a phi node in X under the following set-up scheme [2].
53//
54// This variant of SSA construction ignores incoming undef values.
55// That is paths from the entry without a definition do not result in
56// phi nodes.
57//
58// entry
59// / \
60// A \
61// / \ Y
62// B C /
63// \ / \ /
64// D E
65// \ /
66// F
67// Assume that A contains a divergent branch. We are interested
68// in the set of all blocks where each block is reachable from A
69// via two disjoint paths. This would be the set {D, F} in this
70// case.
71// To generally reduce this query to SSA construction we introduce
72// a virtual variable x and assign to x different values in each
73// successor block of A.
74// entry
75// / \
76// A \
77// / \ Y
78// x = 0 x = 1 /
79// \ / \ /
80// D E
81// \ /
82// F
83// Our flavor of SSA construction for x will construct the following
84// entry
85// / \
86// A \
87// / \ Y
88// x0 = 0 x1 = 1 /
89// \ / \ /
90// x2=phi E
91// \ /
92// x3=phi
93// The blocks D and F contain phi nodes and are thus each reachable
94// by two disjoins paths from A.
95//
96// -- Remarks --
97// In case of loop exits we need to check the disjoint path criterion for loops
98// [2]. To this end, we check whether the definition of x differs between the
99// loop exit and the loop header (_after_ SSA construction).
100//
101//===----------------------------------------------------------------------===//
102#include "llvm/ADT/PostOrderIterator.h"
103#include "llvm/ADT/SmallPtrSet.h"
104#include "llvm/Analysis/PostDominators.h"
105#include "llvm/Analysis/SyncDependenceAnalysis.h"
106#include "llvm/IR/BasicBlock.h"
107#include "llvm/IR/CFG.h"
108#include "llvm/IR/Dominators.h"
109#include "llvm/IR/Function.h"
110
111#include <stack>
112#include <unordered_set>
113
114#define DEBUG_TYPE "sync-dependence"
115
116namespace llvm {
117
118ConstBlockSet SyncDependenceAnalysis::EmptyBlockSet;
119
120SyncDependenceAnalysis::SyncDependenceAnalysis(const DominatorTree &DT,
121 const PostDominatorTree &PDT,
122 const LoopInfo &LI)
123 : FuncRPOT(DT.getRoot()->getParent()), DT(DT), PDT(PDT), LI(LI) {}
124
125SyncDependenceAnalysis::~SyncDependenceAnalysis() {}
126
127using FunctionRPOT = ReversePostOrderTraversal<const Function *>;
128
129// divergence propagator for reducible CFGs
130struct DivergencePropagator {
131 const FunctionRPOT &FuncRPOT;
132 const DominatorTree &DT;
133 const PostDominatorTree &PDT;
134 const LoopInfo &LI;
135
136 // identified join points
137 std::unique_ptr<ConstBlockSet> JoinBlocks;
138
139 // reached loop exits (by a path disjoint to a path to the loop header)
140 SmallPtrSet<const BasicBlock *, 4> ReachedLoopExits;
141
142 // if DefMap[B] == C then C is the dominating definition at block B
143 // if DefMap[B] ~ undef then we haven't seen B yet
144 // if DefMap[B] == B then B is a join point of disjoint paths from X or B is
145 // an immediate successor of X (initial value).
146 using DefiningBlockMap = std::map<const BasicBlock *, const BasicBlock *>;
147 DefiningBlockMap DefMap;
148
149 // all blocks with pending visits
150 std::unordered_set<const BasicBlock *> PendingUpdates;
151
152 DivergencePropagator(const FunctionRPOT &FuncRPOT, const DominatorTree &DT,
153 const PostDominatorTree &PDT, const LoopInfo &LI)
154 : FuncRPOT(FuncRPOT), DT(DT), PDT(PDT), LI(LI),
155 JoinBlocks(new ConstBlockSet) {}
156
157 // set the definition at @block and mark @block as pending for a visit
158 void addPending(const BasicBlock &Block, const BasicBlock &DefBlock) {
159 bool WasAdded = DefMap.emplace(&Block, &DefBlock).second;
160 if (WasAdded)
161 PendingUpdates.insert(&Block);
162 }
163
164 void printDefs(raw_ostream &Out) {
165 Out << "Propagator::DefMap {\n";
166 for (const auto *Block : FuncRPOT) {
167 auto It = DefMap.find(Block);
168 Out << Block->getName() << " : ";
169 if (It == DefMap.end()) {
170 Out << "\n";
171 } else {
172 const auto *DefBlock = It->second;
173 Out << (DefBlock ? DefBlock->getName() : "<null>") << "\n";
174 }
175 }
176 Out << "}\n";
177 }
178
179 // process @succBlock with reaching definition @defBlock
180 // the original divergent branch was in @parentLoop (if any)
181 void visitSuccessor(const BasicBlock &SuccBlock, const Loop *ParentLoop,
182 const BasicBlock &DefBlock) {
183
184 // @succBlock is a loop exit
185 if (ParentLoop && !ParentLoop->contains(&SuccBlock)) {
186 DefMap.emplace(&SuccBlock, &DefBlock);
187 ReachedLoopExits.insert(&SuccBlock);
188 return;
189 }
190
191 // first reaching def?
192 auto ItLastDef = DefMap.find(&SuccBlock);
193 if (ItLastDef == DefMap.end()) {
194 addPending(SuccBlock, DefBlock);
195 return;
196 }
197
198 // a join of at least two definitions
199 if (ItLastDef->second != &DefBlock) {
200 // do we know this join already?
201 if (!JoinBlocks->insert(&SuccBlock).second)
202 return;
203
204 // update the definition
205 addPending(SuccBlock, SuccBlock);
206 }
207 }
208
209 // find all blocks reachable by two disjoint paths from @rootTerm.
Chandler Carruth608e6fa2018-10-19 00:22:10 +0000210 // This method works for both divergent terminators and loops with
Nicolai Haehnle59041682018-10-18 09:38:44 +0000211 // divergent exits.
212 // @rootBlock is either the block containing the branch or the header of the
213 // divergent loop.
214 // @nodeSuccessors is the set of successors of the node (Loop or Terminator)
215 // headed by @rootBlock.
216 // @parentLoop is the parent loop of the Loop or the loop that contains the
217 // Terminator.
218 template <typename SuccessorIterable>
219 std::unique_ptr<ConstBlockSet>
220 computeJoinPoints(const BasicBlock &RootBlock,
221 SuccessorIterable NodeSuccessors, const Loop *ParentLoop) {
222 assert(JoinBlocks);
223
224 // immediate post dominator (no join block beyond that block)
225 const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(&RootBlock));
226 const auto *IpdNode = PdNode->getIDom();
227 const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
228
229 // bootstrap with branch targets
230 for (const auto *SuccBlock : NodeSuccessors) {
231 DefMap.emplace(SuccBlock, SuccBlock);
232
233 if (ParentLoop && !ParentLoop->contains(SuccBlock)) {
234 // immediate loop exit from node.
235 ReachedLoopExits.insert(SuccBlock);
236 continue;
237 } else {
238 // regular successor
239 PendingUpdates.insert(SuccBlock);
240 }
241 }
242
243 auto ItBeginRPO = FuncRPOT.begin();
244
245 // skip until term (TODO RPOT won't let us start at @term directly)
246 for (; *ItBeginRPO != &RootBlock; ++ItBeginRPO) {}
247
248 auto ItEndRPO = FuncRPOT.end();
249 assert(ItBeginRPO != ItEndRPO);
250
251 // propagate definitions at the immediate successors of the node in RPO
252 auto ItBlockRPO = ItBeginRPO;
253 while (++ItBlockRPO != ItEndRPO && *ItBlockRPO != PdBoundBlock) {
254 const auto *Block = *ItBlockRPO;
255
256 // skip @block if not pending update
257 auto ItPending = PendingUpdates.find(Block);
258 if (ItPending == PendingUpdates.end())
259 continue;
260 PendingUpdates.erase(ItPending);
261
262 // propagate definition at @block to its successors
263 auto ItDef = DefMap.find(Block);
264 const auto *DefBlock = ItDef->second;
265 assert(DefBlock);
266
267 auto *BlockLoop = LI.getLoopFor(Block);
268 if (ParentLoop &&
269 (ParentLoop != BlockLoop && ParentLoop->contains(BlockLoop))) {
270 // if the successor is the header of a nested loop pretend its a
271 // single node with the loop's exits as successors
272 SmallVector<BasicBlock *, 4> BlockLoopExits;
273 BlockLoop->getExitBlocks(BlockLoopExits);
274 for (const auto *BlockLoopExit : BlockLoopExits) {
275 visitSuccessor(*BlockLoopExit, ParentLoop, *DefBlock);
276 }
277
278 } else {
279 // the successors are either on the same loop level or loop exits
280 for (const auto *SuccBlock : successors(Block)) {
281 visitSuccessor(*SuccBlock, ParentLoop, *DefBlock);
282 }
283 }
284 }
285
286 // We need to know the definition at the parent loop header to decide
287 // whether the definition at the header is different from the definition at
288 // the loop exits, which would indicate a divergent loop exits.
289 //
290 // A // loop header
291 // |
292 // B // nested loop header
293 // |
294 // C -> X (exit from B loop) -..-> (A latch)
295 // |
296 // D -> back to B (B latch)
297 // |
298 // proper exit from both loops
299 //
300 // D post-dominates B as it is the only proper exit from the "A loop".
301 // If C has a divergent branch, propagation will therefore stop at D.
302 // That implies that B will never receive a definition.
303 // But that definition can only be the same as at D (D itself in thise case)
304 // because all paths to anywhere have to pass through D.
305 //
306 const BasicBlock *ParentLoopHeader =
307 ParentLoop ? ParentLoop->getHeader() : nullptr;
308 if (ParentLoop && ParentLoop->contains(PdBoundBlock)) {
309 DefMap[ParentLoopHeader] = DefMap[PdBoundBlock];
310 }
311
312 // analyze reached loop exits
313 if (!ReachedLoopExits.empty()) {
314 assert(ParentLoop);
315 const auto *HeaderDefBlock = DefMap[ParentLoopHeader];
316 LLVM_DEBUG(printDefs(dbgs()));
317 assert(HeaderDefBlock && "no definition in header of carrying loop");
318
319 for (const auto *ExitBlock : ReachedLoopExits) {
320 auto ItExitDef = DefMap.find(ExitBlock);
321 assert((ItExitDef != DefMap.end()) &&
322 "no reaching def at reachable loop exit");
323 if (ItExitDef->second != HeaderDefBlock) {
324 JoinBlocks->insert(ExitBlock);
325 }
326 }
327 }
328
329 return std::move(JoinBlocks);
330 }
331};
332
333const ConstBlockSet &SyncDependenceAnalysis::join_blocks(const Loop &Loop) {
334 using LoopExitVec = SmallVector<BasicBlock *, 4>;
335 LoopExitVec LoopExits;
336 Loop.getExitBlocks(LoopExits);
337 if (LoopExits.size() < 1) {
338 return EmptyBlockSet;
339 }
340
341 // already available in cache?
342 auto ItCached = CachedLoopExitJoins.find(&Loop);
343 if (ItCached != CachedLoopExitJoins.end())
344 return *ItCached->second;
345
346 // compute all join points
347 DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
348 auto JoinBlocks = Propagator.computeJoinPoints<const LoopExitVec &>(
349 *Loop.getHeader(), LoopExits, Loop.getParentLoop());
350
351 auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks));
352 assert(ItInserted.second);
353 return *ItInserted.first->second;
354}
355
356const ConstBlockSet &
Chandler Carruth608e6fa2018-10-19 00:22:10 +0000357SyncDependenceAnalysis::join_blocks(const Instruction &Term) {
Nicolai Haehnle59041682018-10-18 09:38:44 +0000358 // trivial case
359 if (Term.getNumSuccessors() < 1) {
360 return EmptyBlockSet;
361 }
362
363 // already available in cache?
364 auto ItCached = CachedBranchJoins.find(&Term);
365 if (ItCached != CachedBranchJoins.end())
366 return *ItCached->second;
367
368 // compute all join points
369 DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
370 const auto &TermBlock = *Term.getParent();
371 auto JoinBlocks = Propagator.computeJoinPoints<succ_const_range>(
372 TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock));
373
374 auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks));
375 assert(ItInserted.second);
376 return *ItInserted.first->second;
377}
378
379} // namespace llvm