| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 1 | //===-- CFGMST.h - Minimum Spanning Tree for CFG ----------------*- C++ -*-===// | 
|  | 2 | // | 
| Chandler Carruth | 2946cd7 | 2019-01-19 08:50:56 +0000 | [diff] [blame] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | 4 | // See https://llvm.org/LICENSE.txt for license information. | 
|  | 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 6 | // | 
|  | 7 | //===----------------------------------------------------------------------===// | 
|  | 8 | // | 
|  | 9 | // This file implements a Union-find algorithm to compute Minimum Spanning Tree | 
|  | 10 | // for a given CFG. | 
|  | 11 | // | 
|  | 12 | //===----------------------------------------------------------------------===// | 
|  | 13 |  | 
| Sam Clegg | fd5ab25 | 2017-07-12 20:49:21 +0000 | [diff] [blame] | 14 | #ifndef LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H | 
|  | 15 | #define LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H | 
|  | 16 |  | 
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 17 | #include "llvm/ADT/DenseMap.h" | 
|  | 18 | #include "llvm/ADT/STLExtras.h" | 
|  | 19 | #include "llvm/Analysis/BlockFrequencyInfo.h" | 
|  | 20 | #include "llvm/Analysis/BranchProbabilityInfo.h" | 
|  | 21 | #include "llvm/Analysis/CFG.h" | 
|  | 22 | #include "llvm/Support/BranchProbability.h" | 
|  | 23 | #include "llvm/Support/Debug.h" | 
|  | 24 | #include "llvm/Support/raw_ostream.h" | 
|  | 25 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" | 
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 26 | #include <utility> | 
|  | 27 | #include <vector> | 
|  | 28 |  | 
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 29 | #define DEBUG_TYPE "cfgmst" | 
|  | 30 |  | 
| Sam Clegg | fd5ab25 | 2017-07-12 20:49:21 +0000 | [diff] [blame] | 31 | namespace llvm { | 
|  | 32 |  | 
| Adrian Prantl | 5f8f34e4 | 2018-05-01 15:54:18 +0000 | [diff] [blame] | 33 | /// An union-find based Minimum Spanning Tree for CFG | 
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 34 | /// | 
|  | 35 | /// Implements a Union-find algorithm to compute Minimum Spanning Tree | 
|  | 36 | /// for a given CFG. | 
|  | 37 | template <class Edge, class BBInfo> class CFGMST { | 
|  | 38 | public: | 
|  | 39 | Function &F; | 
|  | 40 |  | 
|  | 41 | // Store all the edges in CFG. It may contain some stale edges | 
|  | 42 | // when Removed is set. | 
|  | 43 | std::vector<std::unique_ptr<Edge>> AllEdges; | 
|  | 44 |  | 
|  | 45 | // This map records the auxiliary information for each BB. | 
|  | 46 | DenseMap<const BasicBlock *, std::unique_ptr<BBInfo>> BBInfos; | 
|  | 47 |  | 
| Xinliang David Li | 19fb5b4 | 2017-12-18 17:56:19 +0000 | [diff] [blame] | 48 | // Whehter the function has an exit block with no successors. | 
|  | 49 | // (For function with an infinite loop, this block may be absent) | 
|  | 50 | bool ExitBlockFound = false; | 
|  | 51 |  | 
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 52 | // Find the root group of the G and compress the path from G to the root. | 
|  | 53 | BBInfo *findAndCompressGroup(BBInfo *G) { | 
|  | 54 | if (G->Group != G) | 
|  | 55 | G->Group = findAndCompressGroup(static_cast<BBInfo *>(G->Group)); | 
|  | 56 | return static_cast<BBInfo *>(G->Group); | 
|  | 57 | } | 
|  | 58 |  | 
|  | 59 | // Union BB1 and BB2 into the same group and return true. | 
|  | 60 | // Returns false if BB1 and BB2 are already in the same group. | 
|  | 61 | bool unionGroups(const BasicBlock *BB1, const BasicBlock *BB2) { | 
|  | 62 | BBInfo *BB1G = findAndCompressGroup(&getBBInfo(BB1)); | 
|  | 63 | BBInfo *BB2G = findAndCompressGroup(&getBBInfo(BB2)); | 
|  | 64 |  | 
|  | 65 | if (BB1G == BB2G) | 
|  | 66 | return false; | 
|  | 67 |  | 
|  | 68 | // Make the smaller rank tree a direct child or the root of high rank tree. | 
|  | 69 | if (BB1G->Rank < BB2G->Rank) | 
|  | 70 | BB1G->Group = BB2G; | 
|  | 71 | else { | 
|  | 72 | BB2G->Group = BB1G; | 
|  | 73 | // If the ranks are the same, increment root of one tree by one. | 
|  | 74 | if (BB1G->Rank == BB2G->Rank) | 
|  | 75 | BB1G->Rank++; | 
|  | 76 | } | 
|  | 77 | return true; | 
|  | 78 | } | 
|  | 79 |  | 
|  | 80 | // Give BB, return the auxiliary information. | 
|  | 81 | BBInfo &getBBInfo(const BasicBlock *BB) const { | 
|  | 82 | auto It = BBInfos.find(BB); | 
|  | 83 | assert(It->second.get() != nullptr); | 
|  | 84 | return *It->second.get(); | 
|  | 85 | } | 
|  | 86 |  | 
| Rong Xu | a5b5745 | 2016-12-02 19:10:29 +0000 | [diff] [blame] | 87 | // Give BB, return the auxiliary information if it's available. | 
|  | 88 | BBInfo *findBBInfo(const BasicBlock *BB) const { | 
|  | 89 | auto It = BBInfos.find(BB); | 
|  | 90 | if (It == BBInfos.end()) | 
|  | 91 | return nullptr; | 
|  | 92 | return It->second.get(); | 
|  | 93 | } | 
|  | 94 |  | 
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 95 | // Traverse the CFG using a stack. Find all the edges and assign the weight. | 
|  | 96 | // Edges with large weight will be put into MST first so they are less likely | 
|  | 97 | // to be instrumented. | 
|  | 98 | void buildEdges() { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 99 | LLVM_DEBUG(dbgs() << "Build Edge on " << F.getName() << "\n"); | 
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 100 |  | 
| Xinliang David Li | 19fb5b4 | 2017-12-18 17:56:19 +0000 | [diff] [blame] | 101 | const BasicBlock *Entry = &(F.getEntryBlock()); | 
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 102 | uint64_t EntryWeight = (BFI != nullptr ? BFI->getEntryFreq() : 2); | 
| Xinliang David Li | 19fb5b4 | 2017-12-18 17:56:19 +0000 | [diff] [blame] | 103 | Edge *EntryIncoming = nullptr, *EntryOutgoing = nullptr, | 
|  | 104 | *ExitOutgoing = nullptr, *ExitIncoming = nullptr; | 
|  | 105 | uint64_t MaxEntryOutWeight = 0, MaxExitOutWeight = 0, MaxExitInWeight = 0; | 
|  | 106 |  | 
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 107 | // Add a fake edge to the entry. | 
| Xinliang David Li | 19fb5b4 | 2017-12-18 17:56:19 +0000 | [diff] [blame] | 108 | EntryIncoming = &addEdge(nullptr, Entry, EntryWeight); | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 109 | LLVM_DEBUG(dbgs() << "  Edge: from fake node to " << Entry->getName() | 
|  | 110 | << " w = " << EntryWeight << "\n"); | 
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 111 |  | 
|  | 112 | // Special handling for single BB functions. | 
| Xinliang David Li | 19fb5b4 | 2017-12-18 17:56:19 +0000 | [diff] [blame] | 113 | if (succ_empty(Entry)) { | 
|  | 114 | addEdge(Entry, nullptr, EntryWeight); | 
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 115 | return; | 
|  | 116 | } | 
|  | 117 |  | 
|  | 118 | static const uint32_t CriticalEdgeMultiplier = 1000; | 
|  | 119 |  | 
|  | 120 | for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { | 
| Chandler Carruth | edb12a8 | 2018-10-15 10:04:59 +0000 | [diff] [blame] | 121 | Instruction *TI = BB->getTerminator(); | 
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 122 | uint64_t BBWeight = | 
|  | 123 | (BFI != nullptr ? BFI->getBlockFreq(&*BB).getFrequency() : 2); | 
|  | 124 | uint64_t Weight = 2; | 
|  | 125 | if (int successors = TI->getNumSuccessors()) { | 
|  | 126 | for (int i = 0; i != successors; ++i) { | 
|  | 127 | BasicBlock *TargetBB = TI->getSuccessor(i); | 
|  | 128 | bool Critical = isCriticalEdge(TI, i); | 
|  | 129 | uint64_t scaleFactor = BBWeight; | 
|  | 130 | if (Critical) { | 
|  | 131 | if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier) | 
|  | 132 | scaleFactor *= CriticalEdgeMultiplier; | 
|  | 133 | else | 
|  | 134 | scaleFactor = UINT64_MAX; | 
|  | 135 | } | 
|  | 136 | if (BPI != nullptr) | 
|  | 137 | Weight = BPI->getEdgeProbability(&*BB, TargetBB).scale(scaleFactor); | 
| Xinliang David Li | 19fb5b4 | 2017-12-18 17:56:19 +0000 | [diff] [blame] | 138 | auto *E = &addEdge(&*BB, TargetBB, Weight); | 
|  | 139 | E->IsCritical = Critical; | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 140 | LLVM_DEBUG(dbgs() << "  Edge: from " << BB->getName() << " to " | 
|  | 141 | << TargetBB->getName() << "  w=" << Weight << "\n"); | 
| Xinliang David Li | 19fb5b4 | 2017-12-18 17:56:19 +0000 | [diff] [blame] | 142 |  | 
|  | 143 | // Keep track of entry/exit edges: | 
|  | 144 | if (&*BB == Entry) { | 
|  | 145 | if (Weight > MaxEntryOutWeight) { | 
|  | 146 | MaxEntryOutWeight = Weight; | 
|  | 147 | EntryOutgoing = E; | 
|  | 148 | } | 
|  | 149 | } | 
|  | 150 |  | 
|  | 151 | auto *TargetTI = TargetBB->getTerminator(); | 
|  | 152 | if (TargetTI && !TargetTI->getNumSuccessors()) { | 
|  | 153 | if (Weight > MaxExitInWeight) { | 
|  | 154 | MaxExitInWeight = Weight; | 
|  | 155 | ExitIncoming = E; | 
|  | 156 | } | 
|  | 157 | } | 
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 158 | } | 
|  | 159 | } else { | 
| Xinliang David Li | 19fb5b4 | 2017-12-18 17:56:19 +0000 | [diff] [blame] | 160 | ExitBlockFound = true; | 
|  | 161 | Edge *ExitO = &addEdge(&*BB, nullptr, BBWeight); | 
|  | 162 | if (BBWeight > MaxExitOutWeight) { | 
|  | 163 | MaxExitOutWeight = BBWeight; | 
|  | 164 | ExitOutgoing = ExitO; | 
|  | 165 | } | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 166 | LLVM_DEBUG(dbgs() << "  Edge: from " << BB->getName() << " to fake exit" | 
|  | 167 | << " w = " << BBWeight << "\n"); | 
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 168 | } | 
|  | 169 | } | 
| Xinliang David Li | 19fb5b4 | 2017-12-18 17:56:19 +0000 | [diff] [blame] | 170 |  | 
|  | 171 | // Entry/exit edge adjustment heurisitic: | 
|  | 172 | // prefer instrumenting entry edge over exit edge | 
|  | 173 | // if possible. Those exit edges may never have a chance to be | 
|  | 174 | // executed (for instance the program is an event handling loop) | 
|  | 175 | // before the profile is asynchronously dumped. | 
|  | 176 | // | 
|  | 177 | // If EntryIncoming and ExitOutgoing has similar weight, make sure | 
|  | 178 | // ExitOutging is selected as the min-edge. Similarly, if EntryOutgoing | 
|  | 179 | // and ExitIncoming has similar weight, make sure ExitIncoming becomes | 
|  | 180 | // the min-edge. | 
|  | 181 | uint64_t EntryInWeight = EntryWeight; | 
|  | 182 |  | 
|  | 183 | if (EntryInWeight >= MaxExitOutWeight && | 
|  | 184 | EntryInWeight * 2 < MaxExitOutWeight * 3) { | 
|  | 185 | EntryIncoming->Weight = MaxExitOutWeight; | 
|  | 186 | ExitOutgoing->Weight = EntryInWeight + 1; | 
|  | 187 | } | 
|  | 188 |  | 
|  | 189 | if (MaxEntryOutWeight >= MaxExitInWeight && | 
|  | 190 | MaxEntryOutWeight * 2 < MaxExitInWeight * 3) { | 
|  | 191 | EntryOutgoing->Weight = MaxExitInWeight; | 
|  | 192 | ExitIncoming->Weight = MaxEntryOutWeight + 1; | 
|  | 193 | } | 
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 194 | } | 
|  | 195 |  | 
|  | 196 | // Sort CFG edges based on its weight. | 
|  | 197 | void sortEdgesByWeight() { | 
|  | 198 | std::stable_sort(AllEdges.begin(), AllEdges.end(), | 
|  | 199 | [](const std::unique_ptr<Edge> &Edge1, | 
|  | 200 | const std::unique_ptr<Edge> &Edge2) { | 
|  | 201 | return Edge1->Weight > Edge2->Weight; | 
|  | 202 | }); | 
|  | 203 | } | 
|  | 204 |  | 
|  | 205 | // Traverse all the edges and compute the Minimum Weight Spanning Tree | 
|  | 206 | // using union-find algorithm. | 
|  | 207 | void computeMinimumSpanningTree() { | 
|  | 208 | // First, put all the critical edge with landing-pad as the Dest to MST. | 
|  | 209 | // This works around the insufficient support of critical edges split | 
|  | 210 | // when destination BB is a landing pad. | 
|  | 211 | for (auto &Ei : AllEdges) { | 
|  | 212 | if (Ei->Removed) | 
|  | 213 | continue; | 
|  | 214 | if (Ei->IsCritical) { | 
|  | 215 | if (Ei->DestBB && Ei->DestBB->isLandingPad()) { | 
|  | 216 | if (unionGroups(Ei->SrcBB, Ei->DestBB)) | 
|  | 217 | Ei->InMST = true; | 
|  | 218 | } | 
|  | 219 | } | 
|  | 220 | } | 
|  | 221 |  | 
|  | 222 | for (auto &Ei : AllEdges) { | 
|  | 223 | if (Ei->Removed) | 
|  | 224 | continue; | 
| Xinliang David Li | 19fb5b4 | 2017-12-18 17:56:19 +0000 | [diff] [blame] | 225 | // If we detect infinite loops, force | 
|  | 226 | // instrumenting the entry edge: | 
|  | 227 | if (!ExitBlockFound && Ei->SrcBB == nullptr) | 
|  | 228 | continue; | 
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 229 | if (unionGroups(Ei->SrcBB, Ei->DestBB)) | 
|  | 230 | Ei->InMST = true; | 
|  | 231 | } | 
|  | 232 | } | 
|  | 233 |  | 
|  | 234 | // Dump the Debug information about the instrumentation. | 
|  | 235 | void dumpEdges(raw_ostream &OS, const Twine &Message) const { | 
|  | 236 | if (!Message.str().empty()) | 
|  | 237 | OS << Message << "\n"; | 
|  | 238 | OS << "  Number of Basic Blocks: " << BBInfos.size() << "\n"; | 
|  | 239 | for (auto &BI : BBInfos) { | 
|  | 240 | const BasicBlock *BB = BI.first; | 
|  | 241 | OS << "  BB: " << (BB == nullptr ? "FakeNode" : BB->getName()) << "  " | 
|  | 242 | << BI.second->infoString() << "\n"; | 
|  | 243 | } | 
|  | 244 |  | 
|  | 245 | OS << "  Number of Edges: " << AllEdges.size() | 
|  | 246 | << " (*: Instrument, C: CriticalEdge, -: Removed)\n"; | 
|  | 247 | uint32_t Count = 0; | 
|  | 248 | for (auto &EI : AllEdges) | 
|  | 249 | OS << "  Edge " << Count++ << ": " << getBBInfo(EI->SrcBB).Index << "-->" | 
|  | 250 | << getBBInfo(EI->DestBB).Index << EI->infoString() << "\n"; | 
|  | 251 | } | 
|  | 252 |  | 
|  | 253 | // Add an edge to AllEdges with weight W. | 
|  | 254 | Edge &addEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W) { | 
|  | 255 | uint32_t Index = BBInfos.size(); | 
|  | 256 | auto Iter = BBInfos.end(); | 
|  | 257 | bool Inserted; | 
|  | 258 | std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Src, nullptr)); | 
|  | 259 | if (Inserted) { | 
|  | 260 | // Newly inserted, update the real info. | 
|  | 261 | Iter->second = std::move(llvm::make_unique<BBInfo>(Index)); | 
|  | 262 | Index++; | 
|  | 263 | } | 
|  | 264 | std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Dest, nullptr)); | 
|  | 265 | if (Inserted) | 
|  | 266 | // Newly inserted, update the real info. | 
|  | 267 | Iter->second = std::move(llvm::make_unique<BBInfo>(Index)); | 
|  | 268 | AllEdges.emplace_back(new Edge(Src, Dest, W)); | 
|  | 269 | return *AllEdges.back(); | 
|  | 270 | } | 
|  | 271 |  | 
| Xinliang David Li | d91057b | 2017-12-08 19:38:07 +0000 | [diff] [blame] | 272 | BranchProbabilityInfo *BPI; | 
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 273 | BlockFrequencyInfo *BFI; | 
|  | 274 |  | 
|  | 275 | public: | 
| Xinliang David Li | d91057b | 2017-12-08 19:38:07 +0000 | [diff] [blame] | 276 | CFGMST(Function &Func, BranchProbabilityInfo *BPI_ = nullptr, | 
|  | 277 | BlockFrequencyInfo *BFI_ = nullptr) | 
|  | 278 | : F(Func), BPI(BPI_), BFI(BFI_) { | 
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 279 | buildEdges(); | 
|  | 280 | sortEdgesByWeight(); | 
|  | 281 | computeMinimumSpanningTree(); | 
|  | 282 | } | 
|  | 283 | }; | 
|  | 284 |  | 
| Rong Xu | f430ae4 | 2015-12-09 18:08:16 +0000 | [diff] [blame] | 285 | } // end namespace llvm | 
| Sam Clegg | fd5ab25 | 2017-07-12 20:49:21 +0000 | [diff] [blame] | 286 |  | 
|  | 287 | #undef DEBUG_TYPE // "cfgmst" | 
|  | 288 |  | 
|  | 289 | #endif // LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H |