Added CFG infrastructure (CFG.cpp and CFG.h) for clang ASTs.
Added builder code to translate ASTs to CFGs. This currently supports
if, return, and non-control flow statements.
Added pretty-printer to debug CFGs.
Added a "-dump-cfg" option to the clang driver to dump CFGs for code
sent through the frontend.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@41252 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/AST/CFG.cpp b/AST/CFG.cpp
new file mode 100644
index 0000000..73d2408
--- /dev/null
+++ b/AST/CFG.cpp
@@ -0,0 +1,351 @@
+//===--- CFG.cpp - Classes for representing and building CFGs----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Ted Kremenek and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the CFG and CFGBuilder classes for representing and
+// building Control-Flow Graphs (CFGs) from ASTs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/CFG.h"
+#include "clang/AST/Expr.h"
+#include <iostream>
+#include <iomanip>
+#include <algorithm>
+using namespace clang;
+
+namespace {
+
+ // SaveAndRestore - A utility class that uses RIIA to save and restore
+ // the value of a variable.
+ template<typename T>
+ struct SaveAndRestore {
+ SaveAndRestore(T& x) : X(x), old_value(x) {}
+ ~SaveAndRestore() { X = old_value; }
+
+ T& X;
+ T old_value;
+ };
+}
+
+/// CFGBuilder - This class is implements CFG construction from an AST.
+/// The builder is stateful: an instance of the builder should be used to only
+/// construct a single CFG.
+///
+/// Example usage:
+///
+/// CFGBuilder builder;
+/// CFG* cfg = builder.BuildAST(stmt1);
+///
+class CFGBuilder {
+ CFG* cfg;
+ CFGBlock* Block;
+ CFGBlock* Exit;
+ CFGBlock* Succ;
+ unsigned NumBlocks;
+
+public:
+ explicit CFGBuilder() : cfg(NULL), Block(NULL), Exit(NULL), Succ(NULL),
+ NumBlocks(0) {
+ // Create an empty CFG.
+ cfg = new CFG();
+ }
+
+ ~CFGBuilder() { delete cfg; }
+
+ /// buildCFG - Constructs a CFG from an AST (a Stmt*). The AST can
+ /// represent an arbitrary statement. Examples include a single expression
+ /// or a function body (compound statement). The ownership of the returned
+ /// CFG is transferred to the caller. If CFG construction fails, this method
+ /// returns NULL.
+ CFG* buildCFG(Stmt* Statement) {
+ if (!Statement) return NULL;
+
+ assert (cfg && "CFGBuilder should only be used to construct one CFG");
+
+ // Create the exit block.
+ Block = createBlock();
+ Exit = Block;
+
+ // Visit the statements and create the CFG.
+ if (CFGBlock* B = visitStmt(Statement)) {
+ // Reverse the statements in the last constructed block. Statements
+ // are inserted into the blocks in reverse order.
+ B->reverseStmts();
+ // NULL out cfg so that repeated calls
+ CFG* t = cfg;
+ cfg = NULL;
+ return t;
+ }
+ else {
+ // Error occured while building CFG: Delete the partially constructed CFG.
+ delete cfg;
+ cfg = NULL;
+ return NULL;
+ }
+ }
+
+private:
+
+ // createBlock - Used to lazily create blocks that are connected
+ // to the current (global) succcessor.
+ CFGBlock* createBlock( bool add_successor = true ) {
+ CFGBlock* B = cfg->createBlock(NumBlocks++);
+ if (add_successor && Succ) B->addSuccessor(Succ);
+ return B;
+ }
+
+ // visitStmt - CFG construction is done via a recursive walk of an AST.
+ // We actually parse the AST in reverse order so that the successor
+ // of a basic block is constructed prior to its predecessor. This
+ // allows us to nicely capture implicit fall-throughs without extra
+ // basic blocks.
+ //
+ // The value returned from this function is the last created CFGBlock
+ // that represents the "entry" point for the translated AST node.
+ CFGBlock* visitStmt(Stmt* Statement) {
+ assert (Statement && "visitStmt does not accept NULL Stmt*");
+
+ switch (Statement->getStmtClass()) {
+ default:
+ assert (false && "statement case for CFGBuilder not yet implemented");
+ return NULL;
+
+ // Statements with no branching control flow.
+ case Stmt::NullStmtClass:
+ case Stmt::DeclStmtClass:
+ case Stmt::PreDefinedExprClass:
+ case Stmt::DeclRefExprClass:
+ case Stmt::IntegerLiteralClass:
+ case Stmt::FloatingLiteralClass:
+ case Stmt::StringLiteralClass:
+ case Stmt::CharacterLiteralClass:
+ case Stmt::ParenExprClass:
+ case Stmt::UnaryOperatorClass:
+ case Stmt::SizeOfAlignOfTypeExprClass:
+ case Stmt::ArraySubscriptExprClass:
+ case Stmt::CallExprClass:
+ case Stmt::BinaryOperatorClass:
+ case Stmt::ImplicitCastExprClass:
+ case Stmt::CompoundLiteralExprClass:
+ case Stmt::OCUVectorElementExprClass:
+ // We cannot assume that we are in the middle of a basic block, since
+ // the CFG might only be constructed for this single statement. If
+ // we have no current basic block, just create one lazily.
+ if (!Block) Block = createBlock();
+
+ // Simply add the statement to the current block. We actually
+ // insert statements in reverse order; this order is reversed later
+ // when processing the containing element in the AST.
+ Block->appendStmt(Statement);
+ break;
+
+ case Stmt::CompoundStmtClass: {
+ // Iterate through the statements of the compound statement in reverse
+ // order. Because this statement may contain statements that have
+ // complicated control flow, the value of "Block" may change at any
+ // time. This means that statements in the compound statement will
+ // automatically be distributed across multiple basic blocks when
+ // necessary.
+ CompoundStmt* C = cast<CompoundStmt>(Statement);
+
+ for (CompoundStmt::reverse_body_iterator I = C->body_rbegin(),
+ E = C->body_rend(); I != E; ++I )
+ // Add the statement to the current block.
+ if (!visitStmt(*I)) return NULL;
+
+ break;
+ }
+
+ case Stmt::IfStmtClass: {
+ IfStmt* I = cast<IfStmt>(Statement);
+
+ // We may see an if statement in the middle of a basic block, or
+ // it may be the first statement we are processing. In either case,
+ // we create a new basic block. First, we create the blocks for
+ // the then...else statements, and then we create the block containing
+ // the if statement. If we were in the middle of a block, we
+ // stop processing that block and reverse its statements. That block
+ // is then the implicit successor for the "then" and "else" clauses.
+
+ // The block we were proccessing is now finished. Make it the
+ // successor block.
+ if (Block) {
+ Succ = Block;
+ Block->reverseStmts();
+ }
+
+ // Process the false branch. NULL out Block so that the recursive
+ // call to visitStmt will create a new basic block.
+ // Null out Block so that all successor
+ CFGBlock* ElseBlock = Succ;
+
+ if (Stmt* Else = I->getElse()) {
+ SaveAndRestore<CFGBlock*> sv(Succ);
+
+ // NULL out Block so that the recursive call to visitStmt will
+ // create a new basic block.
+ Block = NULL;
+ ElseBlock = visitStmt(Else);
+ if (!ElseBlock) return NULL;
+ ElseBlock->reverseStmts();
+ }
+
+ // Process the true branch. NULL out Block so that the recursive
+ // call to visitStmt will create a new basic block.
+ // Null out Block so that all successor
+ CFGBlock* ThenBlock;
+ {
+ Stmt* Then = I->getThen();
+ assert (Then);
+ SaveAndRestore<CFGBlock*> sv(Succ);
+ Block = NULL;
+ ThenBlock = visitStmt(Then);
+ if (!ThenBlock) return NULL;
+ ThenBlock->reverseStmts();
+ }
+
+ // Now create a new block containing the if statement.
+ Block = createBlock(false);
+
+ // Add the condition as the last statement in the new block.
+ Block->appendStmt(I->getCond());
+
+ // Set the terminator of the new block to the If statement.
+ Block->setTerminator(I);
+
+ // Now add the successors.
+ Block->addSuccessor(ThenBlock);
+ Block->addSuccessor(ElseBlock);
+
+ break;
+ }
+
+ case Stmt::ReturnStmtClass: {
+ ReturnStmt* R = cast<ReturnStmt>(Statement);
+
+ // If we were in the middle of a block we stop processing that block
+ // and reverse its statements.
+ //
+ // NOTE: If a "return" appears in the middle of a block, this means
+ // that the code afterwards is DEAD (unreachable). We still
+ // keep a basic block for that code; a simple "mark-and-sweep"
+ // from the entry block will be able to report such dead
+ // blocks.
+ if (Block) Block->reverseStmts();
+
+ // Create the new block.
+ Block = createBlock(false);
+
+ // The Exit block is the only successor.
+ Block->addSuccessor(Exit);
+
+ // Add the return expression to the block.
+ Block->appendStmt(R);
+
+ // Add the return statement itself to the block.
+ if (R->getRetValue()) Block->appendStmt(R->getRetValue());
+
+ break;
+ }
+ } // end dispatch on statement class
+
+ return Block;
+ }
+
+};
+
+// BuildCFG - A helper function that builds CFGs from ASTS.
+CFG* CFG::BuildCFG( Stmt* Statement ) {
+ CFGBuilder Builder;
+ return Builder.buildCFG(Statement);
+}
+
+// reverseStmts - A method that reverses the order of the statements within
+// a CFGBlock.
+void CFGBlock::reverseStmts() { std::reverse(Stmts.begin(),Stmts.end()); }
+
+// dump - A simple pretty printer of a CFG that outputs to stderr.
+void CFG::dump() { print(std::cerr); }
+
+// print - A simple pretty printer of a CFG that outputs to an ostream.
+void CFG::print(std::ostream& OS) {
+ // Iterate through the CFGBlocks and print them one by one. Specially
+ // designate the Entry and Exit blocks.
+ for (iterator I = Blocks.begin(), E = Blocks.end() ; I != E ; ++I) {
+ OS << "\n [ B" << I->getBlockID();
+ if (&(*I) == getExit()) OS << " (EXIT) ]\n";
+ else if (&(*I) == getEntry()) OS << " (ENTRY) ]\n";
+ else OS << " ]\n";
+ I->print(OS);
+ }
+ OS << "\n";
+}
+
+// dump - A simply pretty printer of a CFGBlock that outputs to stderr.
+void CFGBlock::dump() { print(std::cerr); }
+
+// print - A simple pretty printer of a CFGBlock that outputs to an ostream.
+// Generally this will only be called from CFG::print.
+void CFGBlock::print(std::ostream& OS) {
+
+ // Iterate through the statements in the block and print them.
+ OS << " ------------------------\n";
+ unsigned j = 1;
+ for (iterator I = Stmts.begin(), E = Stmts.end() ; I != E ; ++I, ++j ) {
+ OS << " " << std::setw(3) << j << ": ";
+ (*I)->printPretty(OS);
+ if (isa<Expr>(*I)) OS << '\n';
+ }
+ OS << " ------------------------\n";
+
+ // Print the predecessors of this block.
+ OS << " Predecessors (" << pred_size() << "):";
+ unsigned i = 0;
+ for (pred_iterator I = pred_begin(), E = pred_end(); I != E; ++I, ++i ) {
+ if (i == 8 || (i-8) == 0) {
+ OS << "\n ";
+ }
+ OS << " B" << (*I)->getBlockID();
+ }
+
+ // Print the terminator of this block.
+ OS << "\n Terminator: ";
+ if (ControlFlowStmt) {
+ switch (ControlFlowStmt->getStmtClass()) {
+ case Stmt::IfStmtClass: {
+ IfStmt* I = cast<IfStmt>(ControlFlowStmt);
+ OS << "if ";
+ I->getCond()->printPretty(std::cerr);
+ OS << "\n";
+ break;
+ }
+
+ case Stmt::ReturnStmtClass: {
+ ReturnStmt* R = cast<ReturnStmt>(ControlFlowStmt);
+ R->printPretty(std::cerr);
+ break;
+ }
+
+ default:
+ assert(false && "terminator print not fully implemented");
+ }
+ }
+ else OS << "<NULL>\n";
+
+ // Print the successors of this block.
+ OS << " Successors (" << succ_size() << "):";
+ i = 0;
+ for (succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I, ++i ) {
+ if (i == 8 || (i-8) % 10 == 0) {
+ OS << "\n ";
+ }
+ OS << " B" << (*I)->getBlockID();
+ }
+ OS << '\n';
+}
\ No newline at end of file
diff --git a/Driver/ASTStreamers.cpp b/Driver/ASTStreamers.cpp
index 7f67cd4..170c593 100644
--- a/Driver/ASTStreamers.cpp
+++ b/Driver/ASTStreamers.cpp
@@ -13,6 +13,7 @@
#include "ASTStreamers.h"
#include "clang/AST/AST.h"
+#include "clang/AST/CFG.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Sema/ASTStreamer.h"
using namespace clang;
@@ -142,4 +143,30 @@
ASTStreamer_Terminate(Streamer);
}
+void clang::DumpCFGs(Preprocessor &PP, unsigned MainFileID, bool Stats) {
+ ASTContext Context(PP.getTargetInfo(), PP.getIdentifierTable());
+ ASTStreamerTy *Streamer = ASTStreamer_Init(PP, Context, MainFileID);
+
+ while (Decl *D = ASTStreamer_ReadTopLevelDecl(Streamer)) {
+ if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
+ if (FD->getBody()) {
+ PrintFunctionDeclStart(FD);
+ fprintf(stderr,"\n");
+ if (CFG* C = CFG::BuildCFG(FD->getBody()))
+ C->dump();
+ else
+ fprintf(stderr," Error processing CFG.\n");
+ }
+ }
+ }
+
+ if (Stats) {
+ fprintf(stderr, "\nSTATISTICS:\n");
+ ASTStreamer_PrintStats(Streamer);
+ Context.PrintStats();
+ }
+
+ ASTStreamer_Terminate(Streamer);
+}
+
diff --git a/Driver/ASTStreamers.h b/Driver/ASTStreamers.h
index 93749a4..6e90e2f 100644
--- a/Driver/ASTStreamers.h
+++ b/Driver/ASTStreamers.h
@@ -23,6 +23,7 @@
void BuildASTs(Preprocessor &PP, unsigned MainFileID, bool Stats);
void PrintASTs(Preprocessor &PP, unsigned MainFileID, bool Stats);
void DumpASTs(Preprocessor &PP, unsigned MainFileID, bool Stats);
+void DumpCFGs(Preprocessor &PP, unsigned MainFileID, bool Stats);
} // end clang namespace
diff --git a/Driver/clang.cpp b/Driver/clang.cpp
index 2ee8a93..d284434 100644
--- a/Driver/clang.cpp
+++ b/Driver/clang.cpp
@@ -52,6 +52,7 @@
ParseASTDump, // Parse ASTs and dump them.
ParseASTCheck, // Parse ASTs and check diagnostics.
ParseAST, // Parse ASTs.
+ ParseCFGDump, // Parse ASTS. Build CFGs. Print CFGs.
ParsePrintCallbacks, // Parse and print each callback.
ParseSyntaxOnly, // Parse and perform semantic analysis.
ParseNoop, // Parse with noop callbacks.
@@ -84,6 +85,8 @@
"Run parser, build ASTs, then dump them"),
clEnumValN(ParseASTCheck, "parse-ast-check",
"Run parser, build ASTs, then check diagnostics"),
+ clEnumValN(ParseCFGDump, "dump-cfg",
+ "Run parser, build ASTs, then build and print CFGs."),
clEnumValN(EmitLLVM, "emit-llvm",
"Build ASTs then convert to LLVM, emit .ll file"),
clEnumValEnd));
@@ -825,6 +828,9 @@
case ParseASTDump:
DumpASTs(PP, MainFileID, Stats);
break;
+ case ParseCFGDump:
+ DumpCFGs(PP, MainFileID, Stats);
+ break;
case EmitLLVM:
EmitLLVMFromASTs(PP, MainFileID, Stats);
break;
diff --git a/include/clang/AST/CFG.h b/include/clang/AST/CFG.h
new file mode 100644
index 0000000..5039309
--- /dev/null
+++ b/include/clang/AST/CFG.h
@@ -0,0 +1,196 @@
+//===--- CFG.h - Classes for representing and building CFGs------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Ted Kremenek and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the CFG and CFGBuilder classes for representing and
+// building Control-Flow Graphs (CFGs) from ASTs.
+//
+//===----------------------------------------------------------------------===//
+
+#include <list>
+#include <vector>
+#include <iosfwd>
+
+namespace clang {
+
+class Stmt;
+
+/// CFGBlock - Represents a single basic block in a source-level CFG.
+/// It consists of:
+///
+/// (1) A set of statements/expressions (which may contain subexpressions).
+/// (2) A "terminator" statement (not in the set of statements).
+/// (3) A list of successors and predecessors.
+///
+/// Terminator: The terminator represents the type of control-flow that occurs
+/// at the end of the basic block. The terminator is a Stmt* referring to an
+/// AST node that has control-flow: if-statements, breaks, loops, etc.
+/// If the control-flow is conditional, the condition expression will appear
+/// within the set of statements in the block (usually the last statement).
+///
+/// Predecessors: the order in the set of predecessors is arbitrary.
+///
+/// Successors: the order in the set of successors is NOT arbitrary. We
+/// currently have the following orderings based on the terminator:
+///
+/// Terminator Successor Ordering
+/// -----------------------------------------------------
+/// if Then Block; Else Block
+///
+class CFGBlock {
+ typedef std::vector<Stmt*> StatementListTy;
+ /// Stmts - The set of statements in the basic block.
+ StatementListTy Stmts;
+ /// ControlFlowStmt - The terminator for a basic block that
+ /// indicates the type of control-flow that occurs between a block
+ /// and its successors.
+ Stmt* ControlFlowStmt;
+ /// BlockID - A numerical ID assigned to a CFGBlock during construction
+ /// of the CFG.
+ unsigned BlockID;
+
+ /// Predecessors/Successors - Keep track of the predecessor / successor
+ /// CFG blocks.
+ typedef std::vector<CFGBlock*> AdjacentBlocks;
+ AdjacentBlocks Preds;
+ AdjacentBlocks Succs;
+
+public:
+ explicit CFGBlock(unsigned blockid) : ControlFlowStmt(NULL),
+ BlockID(blockid) {}
+ ~CFGBlock() {};
+
+ // Statement iterators
+ typedef StatementListTy::iterator iterator;
+ typedef StatementListTy::const_iterator const_iterator;
+ typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+ typedef std::reverse_iterator<iterator> reverse_iterator;
+
+ Stmt* front() { return Stmts.front(); }
+ Stmt* back() { return Stmts.back(); }
+
+ iterator begin() { return Stmts.begin(); }
+ iterator end() { return Stmts.end(); }
+ const_iterator begin() const { return Stmts.begin(); }
+ const_iterator end() const { return Stmts.end(); }
+
+ reverse_iterator rbegin() { return Stmts.rbegin(); }
+ reverse_iterator rend() { return Stmts.rend(); }
+ const_reverse_iterator rbegin() const { return Stmts.rbegin(); }
+ const_reverse_iterator rend() const { return Stmts.rend(); }
+
+ unsigned size() const { return Stmts.size(); }
+ bool empty() const { return Stmts.empty(); }
+
+ // CFG iterators
+ typedef AdjacentBlocks::iterator pred_iterator;
+ typedef AdjacentBlocks::const_iterator const_pred_iterator;
+ typedef AdjacentBlocks::reverse_iterator pred_reverse_iterator;
+ typedef AdjacentBlocks::const_reverse_iterator const_pred_reverse_iterator;
+
+ typedef AdjacentBlocks::iterator succ_iterator;
+ typedef AdjacentBlocks::const_iterator const_succ_iterator;
+ typedef AdjacentBlocks::reverse_iterator succ_reverse_iterator;
+ typedef AdjacentBlocks::const_reverse_iterator const_succ_reverse_iterator;
+
+ pred_iterator pred_begin() { return Preds.begin(); }
+ pred_iterator pred_end() { return Preds.end(); }
+ const_pred_iterator pred_begin() const { return Preds.begin(); }
+ const_pred_iterator pred_end() const { return Preds.end(); }
+
+ pred_reverse_iterator pred_rbegin() { return Preds.rbegin(); }
+ pred_reverse_iterator pred_rend() { return Preds.rend(); }
+ const_pred_reverse_iterator pred_rbegin() const { return Preds.rbegin(); }
+ const_pred_reverse_iterator pred_rend() const { return Preds.rend(); }
+
+ succ_iterator succ_begin() { return Succs.begin(); }
+ succ_iterator succ_end() { return Succs.end(); }
+ const_succ_iterator succ_begin() const { return Succs.begin(); }
+ const_succ_iterator succ_end() const { return Succs.end(); }
+
+ succ_reverse_iterator succ_rbegin() { return Succs.rbegin(); }
+ succ_reverse_iterator succ_rend() { return Succs.rend(); }
+ const_succ_reverse_iterator succ_rbegin() const { return Succs.rbegin(); }
+ const_succ_reverse_iterator succ_rend() const { return Succs.rend(); }
+
+ unsigned succ_size() const { return Succs.size(); }
+ bool succ_empty() const { return Succs.empty(); }
+
+ unsigned pred_size() const { return Preds.size(); }
+ bool pred_empty() const { return Preds.empty(); }
+
+ // Manipulation of block contents
+
+ void appendStmt(Stmt* Statement) { Stmts.push_back(Statement); }
+ void setTerminator(Stmt* Statement) { ControlFlowStmt = Statement; }
+ void reverseStmts();
+
+ void addSuccessor(CFGBlock* Block) {
+ Block->Preds.push_back(this);
+ Succs.push_back(Block);
+ }
+
+ unsigned getBlockID() const { return BlockID; }
+
+ void dump();
+ void print(std::ostream& OS);
+};
+
+
+/// CFG - Represents a source-level, intra-procedural CFG that represents the
+/// control-flow of a Stmt. The Stmt can represent an entire function body,
+/// or a single expression. A CFG will always contain one empty block that
+/// represents the Exit point of the CFG. A CFG will also contain a designated
+/// Entry block. The CFG solely represents control-flow; it consists of
+/// CFGBlocks which are simply containers of Stmt*'s in the AST the CFG
+/// was constructed from.
+class CFG {
+ typedef std::list<CFGBlock> CFGBlockListTy;
+ CFGBlockListTy Blocks;
+
+public:
+
+ CFG() {};
+ ~CFG() {};
+
+ CFGBlock* createBlock(unsigned blockID) {
+ Blocks.push_front(CFGBlock(blockID));
+ return front();
+ }
+
+ // Block iterators
+ typedef CFGBlockListTy::iterator iterator;
+ typedef CFGBlockListTy::const_iterator const_iterator;
+ typedef std::reverse_iterator<iterator> reverse_iterator;
+ typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+
+ CFGBlock* front() { return &Blocks.front(); }
+ CFGBlock* back() { return &Blocks.back(); }
+
+ iterator begin() { return Blocks.begin(); }
+ iterator end() { return Blocks.end(); }
+ const_iterator begin() const { return Blocks.begin(); }
+ const_iterator end() const { return Blocks.end(); }
+
+ reverse_iterator rbegin() { return Blocks.rbegin(); }
+ reverse_iterator rend() { return Blocks.rend(); }
+ const_reverse_iterator rbegin() const { return Blocks.rbegin(); }
+ const_reverse_iterator rend() const { return Blocks.rend(); }
+
+ CFGBlock* getEntry() { return front(); }
+ CFGBlock* getExit() { return back(); }
+
+ // Utility
+
+ static CFG* BuildCFG(Stmt* AST);
+ void print(std::ostream& OS);
+ void dump();
+
+};
+
+} // end namespace clang
\ No newline at end of file