Nicolai Haehnle | 56d0ed2 | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 1 | //===- LegacyDivergenceAnalysis.cpp --------- Legacy Divergence Analysis |
| 2 | //Implementation -==// |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 3 | // |
Chandler Carruth | 2946cd7 | 2019-01-19 08:50:56 +0000 | [diff] [blame] | 4 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 5 | // See https://llvm.org/LICENSE.txt for license information. |
| 6 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
Marcello Maggioni | ab58c74 | 2015-09-21 17:58:14 +0000 | [diff] [blame] | 10 | // This file implements divergence analysis which determines whether a branch |
| 11 | // in a GPU program is divergent.It can help branch optimizations such as jump |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 12 | // threading and loop unswitching to make better decisions. |
| 13 | // |
| 14 | // GPU programs typically use the SIMD execution model, where multiple threads |
| 15 | // in the same execution group have to execute in lock-step. Therefore, if the |
| 16 | // code contains divergent branches (i.e., threads in a group do not agree on |
| 17 | // which path of the branch to take), the group of threads has to execute all |
| 18 | // the paths from that branch with different subsets of threads enabled until |
| 19 | // they converge at the immediately post-dominating BB of the paths. |
| 20 | // |
| 21 | // Due to this execution model, some optimizations such as jump |
| 22 | // threading and loop unswitching can be unfortunately harmful when performed on |
| 23 | // divergent branches. Therefore, an analysis that computes which branches in a |
| 24 | // GPU program are divergent can help the compiler to selectively run these |
| 25 | // optimizations. |
| 26 | // |
| 27 | // This file defines divergence analysis which computes a conservative but |
| 28 | // non-trivial approximation of all divergent branches in a GPU program. It |
| 29 | // partially implements the approach described in |
| 30 | // |
| 31 | // Divergence Analysis |
| 32 | // Sampaio, Souza, Collange, Pereira |
| 33 | // TOPLAS '13 |
| 34 | // |
| 35 | // The divergence analysis identifies the sources of divergence (e.g., special |
| 36 | // variables that hold the thread ID), and recursively marks variables that are |
| 37 | // data or sync dependent on a source of divergence as divergent. |
| 38 | // |
| 39 | // While data dependency is a well-known concept, the notion of sync dependency |
| 40 | // is worth more explanation. Sync dependence characterizes the control flow |
| 41 | // aspect of the propagation of branch divergence. For example, |
| 42 | // |
| 43 | // %cond = icmp slt i32 %tid, 10 |
| 44 | // br i1 %cond, label %then, label %else |
| 45 | // then: |
| 46 | // br label %merge |
| 47 | // else: |
| 48 | // br label %merge |
| 49 | // merge: |
| 50 | // %a = phi i32 [ 0, %then ], [ 1, %else ] |
| 51 | // |
| 52 | // Suppose %tid holds the thread ID. Although %a is not data dependent on %tid |
| 53 | // because %tid is not on its use-def chains, %a is sync dependent on %tid |
| 54 | // because the branch "br i1 %cond" depends on %tid and affects which value %a |
| 55 | // is assigned to. |
| 56 | // |
| 57 | // The current implementation has the following limitations: |
| 58 | // 1. intra-procedural. It conservatively considers the arguments of a |
| 59 | // non-kernel-entry function and the return value of a function call as |
| 60 | // divergent. |
| 61 | // 2. memory as black box. It conservatively considers values loaded from |
| 62 | // generic or local address as divergent. This can be improved by leveraging |
| 63 | // pointer analysis. |
Marcello Maggioni | ab58c74 | 2015-09-21 17:58:14 +0000 | [diff] [blame] | 64 | // |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 65 | //===----------------------------------------------------------------------===// |
| 66 | |
Nicolai Haehnle | 56d0ed2 | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 67 | #include "llvm/ADT/PostOrderIterator.h" |
| 68 | #include "llvm/Analysis/CFG.h" |
| 69 | #include "llvm/Analysis/DivergenceAnalysis.h" |
Nicolai Haehnle | 35617ed | 2018-08-30 14:21:36 +0000 | [diff] [blame] | 70 | #include "llvm/Analysis/LegacyDivergenceAnalysis.h" |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 71 | #include "llvm/Analysis/Passes.h" |
| 72 | #include "llvm/Analysis/PostDominators.h" |
| 73 | #include "llvm/Analysis/TargetTransformInfo.h" |
Marcello Maggioni | ab58c74 | 2015-09-21 17:58:14 +0000 | [diff] [blame] | 74 | #include "llvm/IR/Dominators.h" |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 75 | #include "llvm/IR/InstIterator.h" |
| 76 | #include "llvm/IR/Instructions.h" |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 77 | #include "llvm/IR/Value.h" |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 78 | #include "llvm/Support/Debug.h" |
| 79 | #include "llvm/Support/raw_ostream.h" |
Marcello Maggioni | ab58c74 | 2015-09-21 17:58:14 +0000 | [diff] [blame] | 80 | #include <vector> |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 81 | using namespace llvm; |
| 82 | |
Tim Renouf | f3d8295 | 2018-07-13 13:13:30 +0000 | [diff] [blame] | 83 | #define DEBUG_TYPE "divergence" |
| 84 | |
Nicolai Haehnle | 56d0ed2 | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 85 | // transparently use the GPUDivergenceAnalysis |
| 86 | static cl::opt<bool> UseGPUDA("use-gpu-divergence-analysis", cl::init(false), |
| 87 | cl::Hidden, |
| 88 | cl::desc("turn the LegacyDivergenceAnalysis into " |
| 89 | "a wrapper for GPUDivergenceAnalysis")); |
| 90 | |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 91 | namespace { |
| 92 | |
| 93 | class DivergencePropagator { |
| 94 | public: |
Marcello Maggioni | ab58c74 | 2015-09-21 17:58:14 +0000 | [diff] [blame] | 95 | DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT, |
Jay Foad | dcb7532 | 2019-07-29 10:22:09 +0000 | [diff] [blame] | 96 | PostDominatorTree &PDT, DenseSet<const Value *> &DV, |
| 97 | DenseSet<const Use *> &DU) |
| 98 | : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV), DU(DU) {} |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 99 | void populateWithSourcesOfDivergence(); |
| 100 | void propagate(); |
| 101 | |
| 102 | private: |
| 103 | // A helper function that explores data dependents of V. |
| 104 | void exploreDataDependency(Value *V); |
| 105 | // A helper function that explores sync dependents of TI. |
Chandler Carruth | 9871662 | 2018-10-18 00:36:15 +0000 | [diff] [blame] | 106 | void exploreSyncDependency(Instruction *TI); |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 107 | // Computes the influence region from Start to End. This region includes all |
Jingyue Wu | 3f42228 | 2015-12-18 21:44:26 +0000 | [diff] [blame] | 108 | // basic blocks on any simple path from Start to End. |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 109 | void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End, |
| 110 | DenseSet<BasicBlock *> &InfluenceRegion); |
| 111 | // Finds all users of I that are outside the influence region, and add these |
| 112 | // users to Worklist. |
| 113 | void findUsersOutsideInfluenceRegion( |
| 114 | Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion); |
| 115 | |
| 116 | Function &F; |
| 117 | TargetTransformInfo &TTI; |
| 118 | DominatorTree &DT; |
| 119 | PostDominatorTree &PDT; |
| 120 | std::vector<Value *> Worklist; // Stack for DFS. |
Marcello Maggioni | ab58c74 | 2015-09-21 17:58:14 +0000 | [diff] [blame] | 121 | DenseSet<const Value *> &DV; // Stores all divergent values. |
Jay Foad | dcb7532 | 2019-07-29 10:22:09 +0000 | [diff] [blame] | 122 | DenseSet<const Use *> &DU; // Stores divergent uses of possibly uniform |
| 123 | // values. |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 124 | }; |
| 125 | |
| 126 | void DivergencePropagator::populateWithSourcesOfDivergence() { |
| 127 | Worklist.clear(); |
| 128 | DV.clear(); |
Jay Foad | dcb7532 | 2019-07-29 10:22:09 +0000 | [diff] [blame] | 129 | DU.clear(); |
Nico Rieck | 7819951 | 2015-08-06 19:10:45 +0000 | [diff] [blame] | 130 | for (auto &I : instructions(F)) { |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 131 | if (TTI.isSourceOfDivergence(&I)) { |
| 132 | Worklist.push_back(&I); |
| 133 | DV.insert(&I); |
| 134 | } |
| 135 | } |
| 136 | for (auto &Arg : F.args()) { |
| 137 | if (TTI.isSourceOfDivergence(&Arg)) { |
| 138 | Worklist.push_back(&Arg); |
| 139 | DV.insert(&Arg); |
| 140 | } |
| 141 | } |
| 142 | } |
| 143 | |
Chandler Carruth | 9871662 | 2018-10-18 00:36:15 +0000 | [diff] [blame] | 144 | void DivergencePropagator::exploreSyncDependency(Instruction *TI) { |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 145 | // Propagation rule 1: if branch TI is divergent, all PHINodes in TI's |
| 146 | // immediate post dominator are divergent. This rule handles if-then-else |
| 147 | // patterns. For example, |
| 148 | // |
| 149 | // if (tid < 5) |
| 150 | // a1 = 1; |
| 151 | // else |
| 152 | // a2 = 2; |
| 153 | // a = phi(a1, a2); // sync dependent on (tid < 5) |
| 154 | BasicBlock *ThisBB = TI->getParent(); |
Matt Arsenault | 790eb1c | 2016-04-29 06:17:47 +0000 | [diff] [blame] | 155 | |
| 156 | // Unreachable blocks may not be in the dominator tree. |
| 157 | if (!DT.isReachableFromEntry(ThisBB)) |
| 158 | return; |
| 159 | |
Matt Arsenault | 1af53a9 | 2016-05-09 16:57:08 +0000 | [diff] [blame] | 160 | // If the function has no exit blocks or doesn't reach any exit blocks, the |
| 161 | // post dominator may be null. |
| 162 | DomTreeNode *ThisNode = PDT.getNode(ThisBB); |
| 163 | if (!ThisNode) |
| 164 | return; |
| 165 | |
| 166 | BasicBlock *IPostDom = ThisNode->getIDom()->getBlock(); |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 167 | if (IPostDom == nullptr) |
| 168 | return; |
| 169 | |
| 170 | for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) { |
| 171 | // A PHINode is uniform if it returns the same value no matter which path is |
| 172 | // taken. |
Nicolai Haehnle | 13d90f3 | 2016-04-14 17:42:47 +0000 | [diff] [blame] | 173 | if (!cast<PHINode>(I)->hasConstantOrUndefValue() && DV.insert(&*I).second) |
Duncan P. N. Exon Smith | 5a82c91 | 2015-10-10 00:53:03 +0000 | [diff] [blame] | 174 | Worklist.push_back(&*I); |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 175 | } |
| 176 | |
| 177 | // Propagation rule 2: if a value defined in a loop is used outside, the user |
| 178 | // is sync dependent on the condition of the loop exits that dominate the |
| 179 | // user. For example, |
| 180 | // |
| 181 | // int i = 0; |
| 182 | // do { |
| 183 | // i++; |
| 184 | // if (foo(i)) ... // uniform |
| 185 | // } while (i < tid); |
| 186 | // if (bar(i)) ... // divergent |
| 187 | // |
| 188 | // A program may contain unstructured loops. Therefore, we cannot leverage |
| 189 | // LoopInfo, which only recognizes natural loops. |
| 190 | // |
| 191 | // The algorithm used here handles both natural and unstructured loops. Given |
| 192 | // a branch TI, we first compute its influence region, the union of all simple |
| 193 | // paths from TI to its immediate post dominator (IPostDom). Then, we search |
| 194 | // for all the values defined in the influence region but used outside. All |
| 195 | // these users are sync dependent on TI. |
| 196 | DenseSet<BasicBlock *> InfluenceRegion; |
| 197 | computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion); |
| 198 | // An insight that can speed up the search process is that all the in-region |
| 199 | // values that are used outside must dominate TI. Therefore, instead of |
| 200 | // searching every basic blocks in the influence region, we search all the |
| 201 | // dominators of TI until it is outside the influence region. |
| 202 | BasicBlock *InfluencedBB = ThisBB; |
| 203 | while (InfluenceRegion.count(InfluencedBB)) { |
Jay Foad | dcb7532 | 2019-07-29 10:22:09 +0000 | [diff] [blame] | 204 | for (auto &I : *InfluencedBB) { |
| 205 | if (!DV.count(&I)) |
| 206 | findUsersOutsideInfluenceRegion(I, InfluenceRegion); |
| 207 | } |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 208 | DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom(); |
| 209 | if (IDomNode == nullptr) |
| 210 | break; |
| 211 | InfluencedBB = IDomNode->getBlock(); |
| 212 | } |
| 213 | } |
| 214 | |
| 215 | void DivergencePropagator::findUsersOutsideInfluenceRegion( |
| 216 | Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) { |
Jay Foad | dcb7532 | 2019-07-29 10:22:09 +0000 | [diff] [blame] | 217 | for (Use &Use : I.uses()) { |
| 218 | Instruction *UserInst = cast<Instruction>(Use.getUser()); |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 219 | if (!InfluenceRegion.count(UserInst->getParent())) { |
Jay Foad | dcb7532 | 2019-07-29 10:22:09 +0000 | [diff] [blame] | 220 | DU.insert(&Use); |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 221 | if (DV.insert(UserInst).second) |
| 222 | Worklist.push_back(UserInst); |
| 223 | } |
| 224 | } |
| 225 | } |
| 226 | |
Jingyue Wu | 3f42228 | 2015-12-18 21:44:26 +0000 | [diff] [blame] | 227 | // A helper function for computeInfluenceRegion that adds successors of "ThisBB" |
| 228 | // to the influence region. |
| 229 | static void |
| 230 | addSuccessorsToInfluenceRegion(BasicBlock *ThisBB, BasicBlock *End, |
| 231 | DenseSet<BasicBlock *> &InfluenceRegion, |
| 232 | std::vector<BasicBlock *> &InfluenceStack) { |
| 233 | for (BasicBlock *Succ : successors(ThisBB)) { |
| 234 | if (Succ != End && InfluenceRegion.insert(Succ).second) |
| 235 | InfluenceStack.push_back(Succ); |
| 236 | } |
| 237 | } |
| 238 | |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 239 | void DivergencePropagator::computeInfluenceRegion( |
| 240 | BasicBlock *Start, BasicBlock *End, |
| 241 | DenseSet<BasicBlock *> &InfluenceRegion) { |
| 242 | assert(PDT.properlyDominates(End, Start) && |
| 243 | "End does not properly dominate Start"); |
Jingyue Wu | 3f42228 | 2015-12-18 21:44:26 +0000 | [diff] [blame] | 244 | |
| 245 | // The influence region starts from the end of "Start" to the beginning of |
| 246 | // "End". Therefore, "Start" should not be in the region unless "Start" is in |
| 247 | // a loop that doesn't contain "End". |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 248 | std::vector<BasicBlock *> InfluenceStack; |
Jingyue Wu | 3f42228 | 2015-12-18 21:44:26 +0000 | [diff] [blame] | 249 | addSuccessorsToInfluenceRegion(Start, End, InfluenceRegion, InfluenceStack); |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 250 | while (!InfluenceStack.empty()) { |
| 251 | BasicBlock *BB = InfluenceStack.back(); |
| 252 | InfluenceStack.pop_back(); |
Jingyue Wu | 3f42228 | 2015-12-18 21:44:26 +0000 | [diff] [blame] | 253 | addSuccessorsToInfluenceRegion(BB, End, InfluenceRegion, InfluenceStack); |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 254 | } |
| 255 | } |
| 256 | |
| 257 | void DivergencePropagator::exploreDataDependency(Value *V) { |
| 258 | // Follow def-use chains of V. |
| 259 | for (User *U : V->users()) { |
Jay Foad | c38188c | 2019-10-02 08:56:33 +0000 | [diff] [blame] | 260 | if (!TTI.isAlwaysUniform(U) && DV.insert(U).second) |
| 261 | Worklist.push_back(U); |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 262 | } |
| 263 | } |
| 264 | |
| 265 | void DivergencePropagator::propagate() { |
| 266 | // Traverse the dependency graph using DFS. |
| 267 | while (!Worklist.empty()) { |
| 268 | Value *V = Worklist.back(); |
| 269 | Worklist.pop_back(); |
Chandler Carruth | 9871662 | 2018-10-18 00:36:15 +0000 | [diff] [blame] | 270 | if (Instruction *I = dyn_cast<Instruction>(V)) { |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 271 | // Terminators with less than two successors won't introduce sync |
| 272 | // dependency. Ignore them. |
Chandler Carruth | 9871662 | 2018-10-18 00:36:15 +0000 | [diff] [blame] | 273 | if (I->isTerminator() && I->getNumSuccessors() > 1) |
| 274 | exploreSyncDependency(I); |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 275 | } |
| 276 | exploreDataDependency(V); |
| 277 | } |
| 278 | } |
| 279 | |
Nicolai Haehnle | 56d0ed2 | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 280 | } // namespace |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 281 | |
Marcello Maggioni | ab58c74 | 2015-09-21 17:58:14 +0000 | [diff] [blame] | 282 | // Register this pass. |
Nicolai Haehnle | 35617ed | 2018-08-30 14:21:36 +0000 | [diff] [blame] | 283 | char LegacyDivergenceAnalysis::ID = 0; |
Nicolai Haehnle | 56d0ed2 | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 284 | INITIALIZE_PASS_BEGIN(LegacyDivergenceAnalysis, "divergence", |
| 285 | "Legacy Divergence Analysis", false, true) |
Marcello Maggioni | ab58c74 | 2015-09-21 17:58:14 +0000 | [diff] [blame] | 286 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
Hongbin Zheng | 3f97840 | 2016-02-25 17:54:07 +0000 | [diff] [blame] | 287 | INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) |
Nicolai Haehnle | 56d0ed2 | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 288 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
| 289 | INITIALIZE_PASS_END(LegacyDivergenceAnalysis, "divergence", |
| 290 | "Legacy Divergence Analysis", false, true) |
Marcello Maggioni | ab58c74 | 2015-09-21 17:58:14 +0000 | [diff] [blame] | 291 | |
Nicolai Haehnle | 35617ed | 2018-08-30 14:21:36 +0000 | [diff] [blame] | 292 | FunctionPass *llvm::createLegacyDivergenceAnalysisPass() { |
| 293 | return new LegacyDivergenceAnalysis(); |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 294 | } |
| 295 | |
Nicolai Haehnle | 35617ed | 2018-08-30 14:21:36 +0000 | [diff] [blame] | 296 | void LegacyDivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { |
Marcello Maggioni | ab58c74 | 2015-09-21 17:58:14 +0000 | [diff] [blame] | 297 | AU.addRequired<DominatorTreeWrapperPass>(); |
Hongbin Zheng | 3f97840 | 2016-02-25 17:54:07 +0000 | [diff] [blame] | 298 | AU.addRequired<PostDominatorTreeWrapperPass>(); |
Nicolai Haehnle | 56d0ed2 | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 299 | if (UseGPUDA) |
| 300 | AU.addRequired<LoopInfoWrapperPass>(); |
Marcello Maggioni | ab58c74 | 2015-09-21 17:58:14 +0000 | [diff] [blame] | 301 | AU.setPreservesAll(); |
| 302 | } |
| 303 | |
Nicolai Haehnle | 56d0ed2 | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 304 | bool LegacyDivergenceAnalysis::shouldUseGPUDivergenceAnalysis( |
| 305 | const Function &F) const { |
| 306 | if (!UseGPUDA) |
| 307 | return false; |
| 308 | |
| 309 | // GPUDivergenceAnalysis requires a reducible CFG. |
| 310 | auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
| 311 | using RPOTraversal = ReversePostOrderTraversal<const Function *>; |
| 312 | RPOTraversal FuncRPOT(&F); |
| 313 | return !containsIrreducibleCFG<const BasicBlock *, const RPOTraversal, |
| 314 | const LoopInfo>(FuncRPOT, LI); |
| 315 | } |
| 316 | |
Nicolai Haehnle | 35617ed | 2018-08-30 14:21:36 +0000 | [diff] [blame] | 317 | bool LegacyDivergenceAnalysis::runOnFunction(Function &F) { |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 318 | auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); |
| 319 | if (TTIWP == nullptr) |
| 320 | return false; |
| 321 | |
| 322 | TargetTransformInfo &TTI = TTIWP->getTTI(F); |
| 323 | // Fast path: if the target does not have branch divergence, we do not mark |
| 324 | // any branch as divergent. |
| 325 | if (!TTI.hasBranchDivergence()) |
| 326 | return false; |
| 327 | |
| 328 | DivergentValues.clear(); |
Jay Foad | dcb7532 | 2019-07-29 10:22:09 +0000 | [diff] [blame] | 329 | DivergentUses.clear(); |
Nicolai Haehnle | 56d0ed2 | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 330 | gpuDA = nullptr; |
| 331 | |
| 332 | auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
Hongbin Zheng | 3f97840 | 2016-02-25 17:54:07 +0000 | [diff] [blame] | 333 | auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); |
Nicolai Haehnle | 56d0ed2 | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 334 | |
| 335 | if (shouldUseGPUDivergenceAnalysis(F)) { |
| 336 | // run the new GPU divergence analysis |
| 337 | auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
Jonas Devlieghere | 0eaee54 | 2019-08-15 15:54:37 +0000 | [diff] [blame] | 338 | gpuDA = std::make_unique<GPUDivergenceAnalysis>(F, DT, PDT, LI, TTI); |
Nicolai Haehnle | 56d0ed2 | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 339 | |
| 340 | } else { |
| 341 | // run LLVM's existing DivergenceAnalysis |
Jay Foad | dcb7532 | 2019-07-29 10:22:09 +0000 | [diff] [blame] | 342 | DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues, DivergentUses); |
Nicolai Haehnle | 56d0ed2 | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 343 | DP.populateWithSourcesOfDivergence(); |
| 344 | DP.propagate(); |
| 345 | } |
| 346 | |
| 347 | LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName() |
| 348 | << ":\n"; |
| 349 | print(dbgs(), F.getParent())); |
| 350 | |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 351 | return false; |
| 352 | } |
| 353 | |
Nicolai Haehnle | 56d0ed2 | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 354 | bool LegacyDivergenceAnalysis::isDivergent(const Value *V) const { |
| 355 | if (gpuDA) { |
| 356 | return gpuDA->isDivergent(*V); |
| 357 | } |
| 358 | return DivergentValues.count(V); |
| 359 | } |
| 360 | |
Jay Foad | dcb7532 | 2019-07-29 10:22:09 +0000 | [diff] [blame] | 361 | bool LegacyDivergenceAnalysis::isDivergentUse(const Use *U) const { |
| 362 | if (gpuDA) { |
| 363 | return gpuDA->isDivergentUse(*U); |
| 364 | } |
| 365 | return DivergentValues.count(U->get()) || DivergentUses.count(U); |
| 366 | } |
| 367 | |
Nicolai Haehnle | 35617ed | 2018-08-30 14:21:36 +0000 | [diff] [blame] | 368 | void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const { |
Nicolai Haehnle | 56d0ed2 | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 369 | if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty()) |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 370 | return; |
Nicolai Haehnle | 56d0ed2 | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 371 | |
Nicolai Haehnle | 413f8691 | 2018-11-30 23:07:49 +0000 | [diff] [blame] | 372 | const Function *F = nullptr; |
Nicolai Haehnle | 56d0ed2 | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 373 | if (!DivergentValues.empty()) { |
| 374 | const Value *FirstDivergentValue = *DivergentValues.begin(); |
| 375 | if (const Argument *Arg = dyn_cast<Argument>(FirstDivergentValue)) { |
| 376 | F = Arg->getParent(); |
| 377 | } else if (const Instruction *I = |
| 378 | dyn_cast<Instruction>(FirstDivergentValue)) { |
| 379 | F = I->getParent()->getParent(); |
| 380 | } else { |
| 381 | llvm_unreachable("Only arguments and instructions can be divergent"); |
| 382 | } |
| 383 | } else if (gpuDA) { |
| 384 | F = &gpuDA->getFunction(); |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 385 | } |
Nicolai Haehnle | 413f8691 | 2018-11-30 23:07:49 +0000 | [diff] [blame] | 386 | if (!F) |
| 387 | return; |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 388 | |
| 389 | // Dumps all divergent values in F, arguments and then instructions. |
| 390 | for (auto &Arg : F->args()) { |
Nicolai Haehnle | 56d0ed2 | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 391 | OS << (isDivergent(&Arg) ? "DIVERGENT: " : " "); |
Tim Renouf | f3d8295 | 2018-07-13 13:13:30 +0000 | [diff] [blame] | 392 | OS << Arg << "\n"; |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 393 | } |
Nico Rieck | 7819951 | 2015-08-06 19:10:45 +0000 | [diff] [blame] | 394 | // Iterate instructions using instructions() to ensure a deterministic order. |
Tim Renouf | f3d8295 | 2018-07-13 13:13:30 +0000 | [diff] [blame] | 395 | for (auto BI = F->begin(), BE = F->end(); BI != BE; ++BI) { |
| 396 | auto &BB = *BI; |
| 397 | OS << "\n " << BB.getName() << ":\n"; |
| 398 | for (auto &I : BB.instructionsWithoutDebug()) { |
Nicolai Haehnle | 56d0ed2 | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 399 | OS << (isDivergent(&I) ? "DIVERGENT: " : " "); |
Tim Renouf | f3d8295 | 2018-07-13 13:13:30 +0000 | [diff] [blame] | 400 | OS << I << "\n"; |
| 401 | } |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 402 | } |
Tim Renouf | f3d8295 | 2018-07-13 13:13:30 +0000 | [diff] [blame] | 403 | OS << "\n"; |
Jingyue Wu | 5da831c | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 404 | } |