| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 1 | //===-- CFGMST.h - Minimum Spanning Tree for CFG ----------------*- C++ -*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file implements a Union-find algorithm to compute Minimum Spanning Tree |
| 11 | // for a given CFG. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| Sam Clegg | fd5ab25 | 2017-07-12 20:49:21 +0000 | [diff] [blame] | 15 | #ifndef LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H |
| 16 | #define LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H |
| 17 | |
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 18 | #include "llvm/ADT/DenseMap.h" |
| 19 | #include "llvm/ADT/STLExtras.h" |
| 20 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
| 21 | #include "llvm/Analysis/BranchProbabilityInfo.h" |
| 22 | #include "llvm/Analysis/CFG.h" |
| 23 | #include "llvm/Support/BranchProbability.h" |
| 24 | #include "llvm/Support/Debug.h" |
| 25 | #include "llvm/Support/raw_ostream.h" |
| 26 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 27 | #include <utility> |
| 28 | #include <vector> |
| 29 | |
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 30 | #define DEBUG_TYPE "cfgmst" |
| 31 | |
| Sam Clegg | fd5ab25 | 2017-07-12 20:49:21 +0000 | [diff] [blame] | 32 | namespace llvm { |
| 33 | |
| Adrian Prantl | 5f8f34e4 | 2018-05-01 15:54:18 +0000 | [diff] [blame] | 34 | /// An union-find based Minimum Spanning Tree for CFG |
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 35 | /// |
| 36 | /// Implements a Union-find algorithm to compute Minimum Spanning Tree |
| 37 | /// for a given CFG. |
| 38 | template <class Edge, class BBInfo> class CFGMST { |
| 39 | public: |
| 40 | Function &F; |
| 41 | |
| 42 | // Store all the edges in CFG. It may contain some stale edges |
| 43 | // when Removed is set. |
| 44 | std::vector<std::unique_ptr<Edge>> AllEdges; |
| 45 | |
| 46 | // This map records the auxiliary information for each BB. |
| 47 | DenseMap<const BasicBlock *, std::unique_ptr<BBInfo>> BBInfos; |
| 48 | |
| Xinliang David Li | 19fb5b4 | 2017-12-18 17:56:19 +0000 | [diff] [blame] | 49 | // Whehter the function has an exit block with no successors. |
| 50 | // (For function with an infinite loop, this block may be absent) |
| 51 | bool ExitBlockFound = false; |
| 52 | |
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 53 | // Find the root group of the G and compress the path from G to the root. |
| 54 | BBInfo *findAndCompressGroup(BBInfo *G) { |
| 55 | if (G->Group != G) |
| 56 | G->Group = findAndCompressGroup(static_cast<BBInfo *>(G->Group)); |
| 57 | return static_cast<BBInfo *>(G->Group); |
| 58 | } |
| 59 | |
| 60 | // Union BB1 and BB2 into the same group and return true. |
| 61 | // Returns false if BB1 and BB2 are already in the same group. |
| 62 | bool unionGroups(const BasicBlock *BB1, const BasicBlock *BB2) { |
| 63 | BBInfo *BB1G = findAndCompressGroup(&getBBInfo(BB1)); |
| 64 | BBInfo *BB2G = findAndCompressGroup(&getBBInfo(BB2)); |
| 65 | |
| 66 | if (BB1G == BB2G) |
| 67 | return false; |
| 68 | |
| 69 | // Make the smaller rank tree a direct child or the root of high rank tree. |
| 70 | if (BB1G->Rank < BB2G->Rank) |
| 71 | BB1G->Group = BB2G; |
| 72 | else { |
| 73 | BB2G->Group = BB1G; |
| 74 | // If the ranks are the same, increment root of one tree by one. |
| 75 | if (BB1G->Rank == BB2G->Rank) |
| 76 | BB1G->Rank++; |
| 77 | } |
| 78 | return true; |
| 79 | } |
| 80 | |
| 81 | // Give BB, return the auxiliary information. |
| 82 | BBInfo &getBBInfo(const BasicBlock *BB) const { |
| 83 | auto It = BBInfos.find(BB); |
| 84 | assert(It->second.get() != nullptr); |
| 85 | return *It->second.get(); |
| 86 | } |
| 87 | |
| Rong Xu | a5b5745 | 2016-12-02 19:10:29 +0000 | [diff] [blame] | 88 | // Give BB, return the auxiliary information if it's available. |
| 89 | BBInfo *findBBInfo(const BasicBlock *BB) const { |
| 90 | auto It = BBInfos.find(BB); |
| 91 | if (It == BBInfos.end()) |
| 92 | return nullptr; |
| 93 | return It->second.get(); |
| 94 | } |
| 95 | |
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 96 | // Traverse the CFG using a stack. Find all the edges and assign the weight. |
| 97 | // Edges with large weight will be put into MST first so they are less likely |
| 98 | // to be instrumented. |
| 99 | void buildEdges() { |
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 100 | LLVM_DEBUG(dbgs() << "Build Edge on " << F.getName() << "\n"); |
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 101 | |
| Xinliang David Li | 19fb5b4 | 2017-12-18 17:56:19 +0000 | [diff] [blame] | 102 | const BasicBlock *Entry = &(F.getEntryBlock()); |
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 103 | uint64_t EntryWeight = (BFI != nullptr ? BFI->getEntryFreq() : 2); |
| Xinliang David Li | 19fb5b4 | 2017-12-18 17:56:19 +0000 | [diff] [blame] | 104 | Edge *EntryIncoming = nullptr, *EntryOutgoing = nullptr, |
| 105 | *ExitOutgoing = nullptr, *ExitIncoming = nullptr; |
| 106 | uint64_t MaxEntryOutWeight = 0, MaxExitOutWeight = 0, MaxExitInWeight = 0; |
| 107 | |
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 108 | // Add a fake edge to the entry. |
| Xinliang David Li | 19fb5b4 | 2017-12-18 17:56:19 +0000 | [diff] [blame] | 109 | EntryIncoming = &addEdge(nullptr, Entry, EntryWeight); |
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 110 | LLVM_DEBUG(dbgs() << " Edge: from fake node to " << Entry->getName() |
| 111 | << " w = " << EntryWeight << "\n"); |
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 112 | |
| 113 | // Special handling for single BB functions. |
| Xinliang David Li | 19fb5b4 | 2017-12-18 17:56:19 +0000 | [diff] [blame] | 114 | if (succ_empty(Entry)) { |
| 115 | addEdge(Entry, nullptr, EntryWeight); |
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 116 | return; |
| 117 | } |
| 118 | |
| 119 | static const uint32_t CriticalEdgeMultiplier = 1000; |
| 120 | |
| 121 | for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { |
| Chandler Carruth | edb12a8 | 2018-10-15 10:04:59 +0000 | [diff] [blame] | 122 | Instruction *TI = BB->getTerminator(); |
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 123 | uint64_t BBWeight = |
| 124 | (BFI != nullptr ? BFI->getBlockFreq(&*BB).getFrequency() : 2); |
| 125 | uint64_t Weight = 2; |
| 126 | if (int successors = TI->getNumSuccessors()) { |
| 127 | for (int i = 0; i != successors; ++i) { |
| 128 | BasicBlock *TargetBB = TI->getSuccessor(i); |
| 129 | bool Critical = isCriticalEdge(TI, i); |
| 130 | uint64_t scaleFactor = BBWeight; |
| 131 | if (Critical) { |
| 132 | if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier) |
| 133 | scaleFactor *= CriticalEdgeMultiplier; |
| 134 | else |
| 135 | scaleFactor = UINT64_MAX; |
| 136 | } |
| 137 | if (BPI != nullptr) |
| 138 | Weight = BPI->getEdgeProbability(&*BB, TargetBB).scale(scaleFactor); |
| Xinliang David Li | 19fb5b4 | 2017-12-18 17:56:19 +0000 | [diff] [blame] | 139 | auto *E = &addEdge(&*BB, TargetBB, Weight); |
| 140 | E->IsCritical = Critical; |
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 141 | LLVM_DEBUG(dbgs() << " Edge: from " << BB->getName() << " to " |
| 142 | << TargetBB->getName() << " w=" << Weight << "\n"); |
| Xinliang David Li | 19fb5b4 | 2017-12-18 17:56:19 +0000 | [diff] [blame] | 143 | |
| 144 | // Keep track of entry/exit edges: |
| 145 | if (&*BB == Entry) { |
| 146 | if (Weight > MaxEntryOutWeight) { |
| 147 | MaxEntryOutWeight = Weight; |
| 148 | EntryOutgoing = E; |
| 149 | } |
| 150 | } |
| 151 | |
| 152 | auto *TargetTI = TargetBB->getTerminator(); |
| 153 | if (TargetTI && !TargetTI->getNumSuccessors()) { |
| 154 | if (Weight > MaxExitInWeight) { |
| 155 | MaxExitInWeight = Weight; |
| 156 | ExitIncoming = E; |
| 157 | } |
| 158 | } |
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 159 | } |
| 160 | } else { |
| Xinliang David Li | 19fb5b4 | 2017-12-18 17:56:19 +0000 | [diff] [blame] | 161 | ExitBlockFound = true; |
| 162 | Edge *ExitO = &addEdge(&*BB, nullptr, BBWeight); |
| 163 | if (BBWeight > MaxExitOutWeight) { |
| 164 | MaxExitOutWeight = BBWeight; |
| 165 | ExitOutgoing = ExitO; |
| 166 | } |
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 167 | LLVM_DEBUG(dbgs() << " Edge: from " << BB->getName() << " to fake exit" |
| 168 | << " w = " << BBWeight << "\n"); |
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 169 | } |
| 170 | } |
| Xinliang David Li | 19fb5b4 | 2017-12-18 17:56:19 +0000 | [diff] [blame] | 171 | |
| 172 | // Entry/exit edge adjustment heurisitic: |
| 173 | // prefer instrumenting entry edge over exit edge |
| 174 | // if possible. Those exit edges may never have a chance to be |
| 175 | // executed (for instance the program is an event handling loop) |
| 176 | // before the profile is asynchronously dumped. |
| 177 | // |
| 178 | // If EntryIncoming and ExitOutgoing has similar weight, make sure |
| 179 | // ExitOutging is selected as the min-edge. Similarly, if EntryOutgoing |
| 180 | // and ExitIncoming has similar weight, make sure ExitIncoming becomes |
| 181 | // the min-edge. |
| 182 | uint64_t EntryInWeight = EntryWeight; |
| 183 | |
| 184 | if (EntryInWeight >= MaxExitOutWeight && |
| 185 | EntryInWeight * 2 < MaxExitOutWeight * 3) { |
| 186 | EntryIncoming->Weight = MaxExitOutWeight; |
| 187 | ExitOutgoing->Weight = EntryInWeight + 1; |
| 188 | } |
| 189 | |
| 190 | if (MaxEntryOutWeight >= MaxExitInWeight && |
| 191 | MaxEntryOutWeight * 2 < MaxExitInWeight * 3) { |
| 192 | EntryOutgoing->Weight = MaxExitInWeight; |
| 193 | ExitIncoming->Weight = MaxEntryOutWeight + 1; |
| 194 | } |
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 195 | } |
| 196 | |
| 197 | // Sort CFG edges based on its weight. |
| 198 | void sortEdgesByWeight() { |
| 199 | std::stable_sort(AllEdges.begin(), AllEdges.end(), |
| 200 | [](const std::unique_ptr<Edge> &Edge1, |
| 201 | const std::unique_ptr<Edge> &Edge2) { |
| 202 | return Edge1->Weight > Edge2->Weight; |
| 203 | }); |
| 204 | } |
| 205 | |
| 206 | // Traverse all the edges and compute the Minimum Weight Spanning Tree |
| 207 | // using union-find algorithm. |
| 208 | void computeMinimumSpanningTree() { |
| 209 | // First, put all the critical edge with landing-pad as the Dest to MST. |
| 210 | // This works around the insufficient support of critical edges split |
| 211 | // when destination BB is a landing pad. |
| 212 | for (auto &Ei : AllEdges) { |
| 213 | if (Ei->Removed) |
| 214 | continue; |
| 215 | if (Ei->IsCritical) { |
| 216 | if (Ei->DestBB && Ei->DestBB->isLandingPad()) { |
| 217 | if (unionGroups(Ei->SrcBB, Ei->DestBB)) |
| 218 | Ei->InMST = true; |
| 219 | } |
| 220 | } |
| 221 | } |
| 222 | |
| 223 | for (auto &Ei : AllEdges) { |
| 224 | if (Ei->Removed) |
| 225 | continue; |
| Xinliang David Li | 19fb5b4 | 2017-12-18 17:56:19 +0000 | [diff] [blame] | 226 | // If we detect infinite loops, force |
| 227 | // instrumenting the entry edge: |
| 228 | if (!ExitBlockFound && Ei->SrcBB == nullptr) |
| 229 | continue; |
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 230 | if (unionGroups(Ei->SrcBB, Ei->DestBB)) |
| 231 | Ei->InMST = true; |
| 232 | } |
| 233 | } |
| 234 | |
| 235 | // Dump the Debug information about the instrumentation. |
| 236 | void dumpEdges(raw_ostream &OS, const Twine &Message) const { |
| 237 | if (!Message.str().empty()) |
| 238 | OS << Message << "\n"; |
| 239 | OS << " Number of Basic Blocks: " << BBInfos.size() << "\n"; |
| 240 | for (auto &BI : BBInfos) { |
| 241 | const BasicBlock *BB = BI.first; |
| 242 | OS << " BB: " << (BB == nullptr ? "FakeNode" : BB->getName()) << " " |
| 243 | << BI.second->infoString() << "\n"; |
| 244 | } |
| 245 | |
| 246 | OS << " Number of Edges: " << AllEdges.size() |
| 247 | << " (*: Instrument, C: CriticalEdge, -: Removed)\n"; |
| 248 | uint32_t Count = 0; |
| 249 | for (auto &EI : AllEdges) |
| 250 | OS << " Edge " << Count++ << ": " << getBBInfo(EI->SrcBB).Index << "-->" |
| 251 | << getBBInfo(EI->DestBB).Index << EI->infoString() << "\n"; |
| 252 | } |
| 253 | |
| 254 | // Add an edge to AllEdges with weight W. |
| 255 | Edge &addEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W) { |
| 256 | uint32_t Index = BBInfos.size(); |
| 257 | auto Iter = BBInfos.end(); |
| 258 | bool Inserted; |
| 259 | std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Src, nullptr)); |
| 260 | if (Inserted) { |
| 261 | // Newly inserted, update the real info. |
| 262 | Iter->second = std::move(llvm::make_unique<BBInfo>(Index)); |
| 263 | Index++; |
| 264 | } |
| 265 | std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Dest, nullptr)); |
| 266 | if (Inserted) |
| 267 | // Newly inserted, update the real info. |
| 268 | Iter->second = std::move(llvm::make_unique<BBInfo>(Index)); |
| 269 | AllEdges.emplace_back(new Edge(Src, Dest, W)); |
| 270 | return *AllEdges.back(); |
| 271 | } |
| 272 | |
| Xinliang David Li | d91057b | 2017-12-08 19:38:07 +0000 | [diff] [blame] | 273 | BranchProbabilityInfo *BPI; |
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 274 | BlockFrequencyInfo *BFI; |
| 275 | |
| 276 | public: |
| Xinliang David Li | d91057b | 2017-12-08 19:38:07 +0000 | [diff] [blame] | 277 | CFGMST(Function &Func, BranchProbabilityInfo *BPI_ = nullptr, |
| 278 | BlockFrequencyInfo *BFI_ = nullptr) |
| 279 | : F(Func), BPI(BPI_), BFI(BFI_) { |
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 280 | buildEdges(); |
| 281 | sortEdgesByWeight(); |
| 282 | computeMinimumSpanningTree(); |
| 283 | } |
| 284 | }; |
| 285 | |
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 286 | } // end namespace llvm |
| Sam Clegg | fd5ab25 | 2017-07-12 20:49:21 +0000 | [diff] [blame] | 287 | |
| 288 | #undef DEBUG_TYPE // "cfgmst" |
| 289 | |
| 290 | #endif // LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H |