Ted Kremenek | 97f7531 | 2007-08-21 21:42:03 +0000 | [diff] [blame] | 1 | //===--- CFG.cpp - Classes for representing and building CFGs----*- C++ -*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file was developed by Ted Kremenek and is distributed under |
| 6 | // the University of Illinois Open Source License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file defines the CFG and CFGBuilder classes for representing and |
| 11 | // building Control-Flow Graphs (CFGs) from ASTs. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "clang/AST/CFG.h" |
| 16 | #include "clang/AST/Expr.h" |
Ted Kremenek | 95e854d | 2007-08-21 22:06:14 +0000 | [diff] [blame] | 17 | #include "clang/AST/StmtVisitor.h" |
Ted Kremenek | c5de222 | 2007-08-21 23:26:17 +0000 | [diff] [blame] | 18 | #include "llvm/ADT/DenseMap.h" |
Ted Kremenek | 97f7531 | 2007-08-21 21:42:03 +0000 | [diff] [blame] | 19 | #include <iostream> |
| 20 | #include <iomanip> |
| 21 | #include <algorithm> |
| 22 | using namespace clang; |
| 23 | |
| 24 | namespace { |
| 25 | |
| 26 | // SaveAndRestore - A utility class that uses RIIA to save and restore |
| 27 | // the value of a variable. |
| 28 | template<typename T> |
| 29 | struct SaveAndRestore { |
| 30 | SaveAndRestore(T& x) : X(x), old_value(x) {} |
| 31 | ~SaveAndRestore() { X = old_value; } |
| 32 | |
| 33 | T& X; |
| 34 | T old_value; |
| 35 | }; |
| 36 | } |
| 37 | |
| 38 | /// CFGBuilder - This class is implements CFG construction from an AST. |
| 39 | /// The builder is stateful: an instance of the builder should be used to only |
| 40 | /// construct a single CFG. |
| 41 | /// |
| 42 | /// Example usage: |
| 43 | /// |
| 44 | /// CFGBuilder builder; |
| 45 | /// CFG* cfg = builder.BuildAST(stmt1); |
| 46 | /// |
Ted Kremenek | 95e854d | 2007-08-21 22:06:14 +0000 | [diff] [blame] | 47 | /// CFG construction is done via a recursive walk of an AST. |
| 48 | /// We actually parse the AST in reverse order so that the successor |
| 49 | /// of a basic block is constructed prior to its predecessor. This |
| 50 | /// allows us to nicely capture implicit fall-throughs without extra |
| 51 | /// basic blocks. |
| 52 | /// |
| 53 | class CFGBuilder : public StmtVisitor<CFGBuilder,CFGBlock*> { |
Ted Kremenek | 97f7531 | 2007-08-21 21:42:03 +0000 | [diff] [blame] | 54 | CFG* cfg; |
| 55 | CFGBlock* Block; |
| 56 | CFGBlock* Exit; |
| 57 | CFGBlock* Succ; |
| 58 | unsigned NumBlocks; |
| 59 | |
Ted Kremenek | c5de222 | 2007-08-21 23:26:17 +0000 | [diff] [blame] | 60 | typedef llvm::DenseMap<LabelStmt*,CFGBlock*> LabelMapTy; |
| 61 | LabelMapTy LabelMap; |
| 62 | |
| 63 | typedef std::list<CFGBlock*> BackpatchBlocksTy; |
| 64 | BackpatchBlocksTy BackpatchBlocks; |
| 65 | |
Ted Kremenek | 97f7531 | 2007-08-21 21:42:03 +0000 | [diff] [blame] | 66 | public: |
| 67 | explicit CFGBuilder() : cfg(NULL), Block(NULL), Exit(NULL), Succ(NULL), |
| 68 | NumBlocks(0) { |
| 69 | // Create an empty CFG. |
| 70 | cfg = new CFG(); |
| 71 | } |
| 72 | |
| 73 | ~CFGBuilder() { delete cfg; } |
| 74 | |
| 75 | /// buildCFG - Constructs a CFG from an AST (a Stmt*). The AST can |
| 76 | /// represent an arbitrary statement. Examples include a single expression |
| 77 | /// or a function body (compound statement). The ownership of the returned |
| 78 | /// CFG is transferred to the caller. If CFG construction fails, this method |
| 79 | /// returns NULL. |
| 80 | CFG* buildCFG(Stmt* Statement) { |
| 81 | if (!Statement) return NULL; |
| 82 | |
Ted Kremenek | c5de222 | 2007-08-21 23:26:17 +0000 | [diff] [blame] | 83 | assert (!Exit && "CFGBuilder should only be used to construct one CFG"); |
Ted Kremenek | 97f7531 | 2007-08-21 21:42:03 +0000 | [diff] [blame] | 84 | |
| 85 | // Create the exit block. |
| 86 | Block = createBlock(); |
| 87 | Exit = Block; |
| 88 | |
| 89 | // Visit the statements and create the CFG. |
Ted Kremenek | 95e854d | 2007-08-21 22:06:14 +0000 | [diff] [blame] | 90 | if (CFGBlock* B = Visit(Statement)) { |
Ted Kremenek | 97f7531 | 2007-08-21 21:42:03 +0000 | [diff] [blame] | 91 | // Reverse the statements in the last constructed block. Statements |
| 92 | // are inserted into the blocks in reverse order. |
| 93 | B->reverseStmts(); |
Ted Kremenek | c5de222 | 2007-08-21 23:26:17 +0000 | [diff] [blame] | 94 | |
| 95 | // Backpatch the gotos whose label -> block mappings we didn't know |
| 96 | // when we encountered them. |
| 97 | for (BackpatchBlocksTy::iterator I = BackpatchBlocks.begin(), |
| 98 | E = BackpatchBlocks.end(); I != E; ++I ) { |
| 99 | |
| 100 | CFGBlock* B = *I; |
| 101 | GotoStmt* G = cast<GotoStmt>(B->getTerminator()); |
| 102 | LabelMapTy::iterator LI = LabelMap.find(G->getLabel()); |
| 103 | |
| 104 | if (LI == LabelMap.end()) |
| 105 | return NULL; // No matching label. Bad CFG. |
| 106 | |
| 107 | B->addSuccessor(LI->second); |
| 108 | } |
| 109 | |
Ted Kremenek | 97f7531 | 2007-08-21 21:42:03 +0000 | [diff] [blame] | 110 | // NULL out cfg so that repeated calls |
| 111 | CFG* t = cfg; |
| 112 | cfg = NULL; |
| 113 | return t; |
| 114 | } |
Ted Kremenek | c5de222 | 2007-08-21 23:26:17 +0000 | [diff] [blame] | 115 | else return NULL; |
Ted Kremenek | 97f7531 | 2007-08-21 21:42:03 +0000 | [diff] [blame] | 116 | } |
Ted Kremenek | 95e854d | 2007-08-21 22:06:14 +0000 | [diff] [blame] | 117 | |
Ted Kremenek | 97f7531 | 2007-08-21 21:42:03 +0000 | [diff] [blame] | 118 | // createBlock - Used to lazily create blocks that are connected |
| 119 | // to the current (global) succcessor. |
| 120 | CFGBlock* createBlock( bool add_successor = true ) { |
| 121 | CFGBlock* B = cfg->createBlock(NumBlocks++); |
| 122 | if (add_successor && Succ) B->addSuccessor(Succ); |
| 123 | return B; |
| 124 | } |
Ted Kremenek | 97f7531 | 2007-08-21 21:42:03 +0000 | [diff] [blame] | 125 | |
Ted Kremenek | 95e854d | 2007-08-21 22:06:14 +0000 | [diff] [blame] | 126 | /// Here we handle statements with no branching control flow. |
| 127 | CFGBlock* VisitStmt(Stmt* Statement) { |
| 128 | // We cannot assume that we are in the middle of a basic block, since |
| 129 | // the CFG might only be constructed for this single statement. If |
| 130 | // we have no current basic block, just create one lazily. |
| 131 | if (!Block) Block = createBlock(); |
| 132 | |
| 133 | // Simply add the statement to the current block. We actually |
| 134 | // insert statements in reverse order; this order is reversed later |
| 135 | // when processing the containing element in the AST. |
| 136 | Block->appendStmt(Statement); |
Ted Kremenek | 97f7531 | 2007-08-21 21:42:03 +0000 | [diff] [blame] | 137 | |
| 138 | return Block; |
| 139 | } |
| 140 | |
Ted Kremenek | 95e854d | 2007-08-21 22:06:14 +0000 | [diff] [blame] | 141 | CFGBlock* VisitCompoundStmt(CompoundStmt* C) { |
| 142 | // The value returned from this function is the last created CFGBlock |
| 143 | // that represents the "entry" point for the translated AST node. |
| 144 | for (CompoundStmt::reverse_body_iterator I = C->body_rbegin(), |
| 145 | E = C->body_rend(); I != E; ++I ) |
| 146 | // Add the statement to the current block. |
| 147 | if (!Visit(*I)) return NULL; |
| 148 | |
| 149 | return Block; |
| 150 | } |
| 151 | |
| 152 | CFGBlock* VisitIfStmt(IfStmt* I) { |
| 153 | |
| 154 | // We may see an if statement in the middle of a basic block, or |
| 155 | // it may be the first statement we are processing. In either case, |
| 156 | // we create a new basic block. First, we create the blocks for |
| 157 | // the then...else statements, and then we create the block containing |
| 158 | // the if statement. If we were in the middle of a block, we |
| 159 | // stop processing that block and reverse its statements. That block |
| 160 | // is then the implicit successor for the "then" and "else" clauses. |
| 161 | |
| 162 | // The block we were proccessing is now finished. Make it the |
| 163 | // successor block. |
| 164 | if (Block) { |
| 165 | Succ = Block; |
| 166 | Block->reverseStmts(); |
| 167 | } |
| 168 | |
| 169 | // Process the false branch. NULL out Block so that the recursive |
| 170 | // call to Visit will create a new basic block. |
| 171 | // Null out Block so that all successor |
| 172 | CFGBlock* ElseBlock = Succ; |
| 173 | |
| 174 | if (Stmt* Else = I->getElse()) { |
| 175 | SaveAndRestore<CFGBlock*> sv(Succ); |
| 176 | |
| 177 | // NULL out Block so that the recursive call to Visit will |
| 178 | // create a new basic block. |
| 179 | Block = NULL; |
| 180 | ElseBlock = Visit(Else); |
| 181 | if (!ElseBlock) return NULL; |
| 182 | ElseBlock->reverseStmts(); |
| 183 | } |
| 184 | |
| 185 | // Process the true branch. NULL out Block so that the recursive |
| 186 | // call to Visit will create a new basic block. |
| 187 | // Null out Block so that all successor |
| 188 | CFGBlock* ThenBlock; |
| 189 | { |
| 190 | Stmt* Then = I->getThen(); |
| 191 | assert (Then); |
| 192 | SaveAndRestore<CFGBlock*> sv(Succ); |
| 193 | Block = NULL; |
| 194 | ThenBlock = Visit(Then); |
| 195 | if (!ThenBlock) return NULL; |
| 196 | ThenBlock->reverseStmts(); |
| 197 | } |
| 198 | |
| 199 | // Now create a new block containing the if statement. |
| 200 | Block = createBlock(false); |
| 201 | |
| 202 | // Add the condition as the last statement in the new block. |
| 203 | Block->appendStmt(I->getCond()); |
| 204 | |
| 205 | // Set the terminator of the new block to the If statement. |
| 206 | Block->setTerminator(I); |
| 207 | |
| 208 | // Now add the successors. |
| 209 | Block->addSuccessor(ThenBlock); |
| 210 | Block->addSuccessor(ElseBlock); |
| 211 | |
| 212 | return Block; |
| 213 | } |
| 214 | |
| 215 | CFGBlock* VisitReturnStmt(ReturnStmt* R) { |
| 216 | // If we were in the middle of a block we stop processing that block |
| 217 | // and reverse its statements. |
| 218 | // |
| 219 | // NOTE: If a "return" appears in the middle of a block, this means |
| 220 | // that the code afterwards is DEAD (unreachable). We still |
| 221 | // keep a basic block for that code; a simple "mark-and-sweep" |
| 222 | // from the entry block will be able to report such dead |
| 223 | // blocks. |
| 224 | if (Block) Block->reverseStmts(); |
| 225 | |
| 226 | // Create the new block. |
| 227 | Block = createBlock(false); |
| 228 | |
| 229 | // The Exit block is the only successor. |
| 230 | Block->addSuccessor(Exit); |
| 231 | |
| 232 | // Add the return expression to the block. |
| 233 | Block->appendStmt(R); |
| 234 | |
| 235 | // Add the return statement itself to the block. |
| 236 | if (R->getRetValue()) Block->appendStmt(R->getRetValue()); |
| 237 | |
| 238 | return Block; |
Ted Kremenek | c5de222 | 2007-08-21 23:26:17 +0000 | [diff] [blame] | 239 | } |
| 240 | |
| 241 | CFGBlock* VisitLabelStmt(LabelStmt* L) { |
| 242 | // Get the block of the labeled statement. Add it to our map. |
| 243 | CFGBlock* LabelBlock = Visit(L->getSubStmt()); |
| 244 | |
| 245 | |
| 246 | assert (LabelMap.find(L) == LabelMap.end() && "label already in map"); |
| 247 | LabelMap[ L ] = LabelBlock; |
| 248 | |
| 249 | // Labels partition blocks, so this is the end of the basic block |
| 250 | // we were processing (the label is the first statement). |
| 251 | assert (Block); |
| 252 | Block->appendStmt(L); |
| 253 | Block->reverseStmts(); |
| 254 | |
| 255 | // We set Block to NULL to allow lazy creation of a new block |
| 256 | // (if necessary); |
| 257 | Block = NULL; |
| 258 | |
| 259 | // This block is now the implicit successor of other blocks. |
| 260 | Succ = LabelBlock; |
| 261 | |
| 262 | return LabelBlock; |
| 263 | } |
| 264 | |
| 265 | CFGBlock* VisitGotoStmt(GotoStmt* G) { |
| 266 | // Goto is a control-flow statement. Thus we stop processing the |
| 267 | // current block and create a new one. |
| 268 | if (Block) Block->reverseStmts(); |
| 269 | Block = createBlock(false); |
| 270 | Block->setTerminator(G); |
| 271 | |
| 272 | // If we already know the mapping to the label block add the |
| 273 | // successor now. |
| 274 | LabelMapTy::iterator I = LabelMap.find(G->getLabel()); |
| 275 | |
| 276 | if (I == LabelMap.end()) |
| 277 | // We will need to backpatch this block later. |
| 278 | BackpatchBlocks.push_back(Block); |
| 279 | else |
| 280 | Block->addSuccessor(I->second); |
| 281 | |
| 282 | return Block; |
| 283 | } |
Ted Kremenek | 97f7531 | 2007-08-21 21:42:03 +0000 | [diff] [blame] | 284 | }; |
| 285 | |
| 286 | // BuildCFG - A helper function that builds CFGs from ASTS. |
Ted Kremenek | 95e854d | 2007-08-21 22:06:14 +0000 | [diff] [blame] | 287 | CFG* CFG::BuildCFG(Stmt* Statement) { |
Ted Kremenek | 97f7531 | 2007-08-21 21:42:03 +0000 | [diff] [blame] | 288 | CFGBuilder Builder; |
| 289 | return Builder.buildCFG(Statement); |
| 290 | } |
| 291 | |
| 292 | // reverseStmts - A method that reverses the order of the statements within |
| 293 | // a CFGBlock. |
| 294 | void CFGBlock::reverseStmts() { std::reverse(Stmts.begin(),Stmts.end()); } |
| 295 | |
| 296 | // dump - A simple pretty printer of a CFG that outputs to stderr. |
| 297 | void CFG::dump() { print(std::cerr); } |
| 298 | |
| 299 | // print - A simple pretty printer of a CFG that outputs to an ostream. |
| 300 | void CFG::print(std::ostream& OS) { |
| 301 | // Iterate through the CFGBlocks and print them one by one. Specially |
| 302 | // designate the Entry and Exit blocks. |
| 303 | for (iterator I = Blocks.begin(), E = Blocks.end() ; I != E ; ++I) { |
| 304 | OS << "\n [ B" << I->getBlockID(); |
Ted Kremenek | c5de222 | 2007-08-21 23:26:17 +0000 | [diff] [blame] | 305 | if (&(*I) == &getExit()) OS << " (EXIT) ]\n"; |
| 306 | else if (&(*I) == &getEntry()) OS << " (ENTRY) ]\n"; |
Ted Kremenek | 97f7531 | 2007-08-21 21:42:03 +0000 | [diff] [blame] | 307 | else OS << " ]\n"; |
| 308 | I->print(OS); |
| 309 | } |
| 310 | OS << "\n"; |
| 311 | } |
| 312 | |
| 313 | // dump - A simply pretty printer of a CFGBlock that outputs to stderr. |
| 314 | void CFGBlock::dump() { print(std::cerr); } |
| 315 | |
| 316 | // print - A simple pretty printer of a CFGBlock that outputs to an ostream. |
| 317 | // Generally this will only be called from CFG::print. |
| 318 | void CFGBlock::print(std::ostream& OS) { |
| 319 | |
| 320 | // Iterate through the statements in the block and print them. |
| 321 | OS << " ------------------------\n"; |
| 322 | unsigned j = 1; |
| 323 | for (iterator I = Stmts.begin(), E = Stmts.end() ; I != E ; ++I, ++j ) { |
Ted Kremenek | c5de222 | 2007-08-21 23:26:17 +0000 | [diff] [blame] | 324 | // Print the statement # in the basic block. |
| 325 | OS << " " << std::setw(3) << j << ": "; |
| 326 | |
| 327 | // Print the statement/expression. |
| 328 | Stmt* S = *I; |
| 329 | |
| 330 | if (LabelStmt* L = dyn_cast<LabelStmt>(S)) |
| 331 | OS << L->getName() << ": (LABEL)\n"; |
| 332 | else |
| 333 | (*I)->printPretty(OS); |
| 334 | |
| 335 | // Expressions need a newline. |
Ted Kremenek | 97f7531 | 2007-08-21 21:42:03 +0000 | [diff] [blame] | 336 | if (isa<Expr>(*I)) OS << '\n'; |
| 337 | } |
| 338 | OS << " ------------------------\n"; |
| 339 | |
| 340 | // Print the predecessors of this block. |
| 341 | OS << " Predecessors (" << pred_size() << "):"; |
| 342 | unsigned i = 0; |
| 343 | for (pred_iterator I = pred_begin(), E = pred_end(); I != E; ++I, ++i ) { |
| 344 | if (i == 8 || (i-8) == 0) { |
| 345 | OS << "\n "; |
| 346 | } |
| 347 | OS << " B" << (*I)->getBlockID(); |
| 348 | } |
| 349 | |
| 350 | // Print the terminator of this block. |
| 351 | OS << "\n Terminator: "; |
| 352 | if (ControlFlowStmt) { |
| 353 | switch (ControlFlowStmt->getStmtClass()) { |
| 354 | case Stmt::IfStmtClass: { |
| 355 | IfStmt* I = cast<IfStmt>(ControlFlowStmt); |
| 356 | OS << "if "; |
| 357 | I->getCond()->printPretty(std::cerr); |
| 358 | OS << "\n"; |
| 359 | break; |
| 360 | } |
| 361 | |
Ted Kremenek | 97f7531 | 2007-08-21 21:42:03 +0000 | [diff] [blame] | 362 | default: |
Ted Kremenek | c5de222 | 2007-08-21 23:26:17 +0000 | [diff] [blame] | 363 | ControlFlowStmt->printPretty(std::cerr); |
| 364 | break; |
Ted Kremenek | 97f7531 | 2007-08-21 21:42:03 +0000 | [diff] [blame] | 365 | } |
| 366 | } |
| 367 | else OS << "<NULL>\n"; |
| 368 | |
| 369 | // Print the successors of this block. |
| 370 | OS << " Successors (" << succ_size() << "):"; |
| 371 | i = 0; |
| 372 | for (succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I, ++i ) { |
| 373 | if (i == 8 || (i-8) % 10 == 0) { |
| 374 | OS << "\n "; |
| 375 | } |
| 376 | OS << " B" << (*I)->getBlockID(); |
| 377 | } |
| 378 | OS << '\n'; |
| 379 | } |