[analyzer] PCH deserialization optimization.

We should not deserialize unused declarations from the PCH file. Achieve
this by storing the top level declarations during parsing
(HandleTopLevelDecl ASTConsumer callback) and analyzing/building a call
graph only for those.

Tested the patch on a sample ObjC file that uses PCH. With the patch, 
 the analyzes is 17.5% faster and clang consumes 40% less memory.
Got about 10% overall build/analyzes time decrease on a large Objective
C project.

A bit of CallGraph refactoring/cleanup as well..

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@154625 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Analysis/CallGraph.cpp b/lib/Analysis/CallGraph.cpp
index 01d6c41..96a16c3 100644
--- a/lib/Analysis/CallGraph.cpp
+++ b/lib/Analysis/CallGraph.cpp
@@ -14,15 +14,58 @@
 
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
-#include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/StmtVisitor.h"
 
 #include "llvm/Support/GraphWriter.h"
 
 using namespace clang;
 
-/// Determine if a declaration should be included in the graph.
-static bool includeInGraph(const Decl *D) {
+namespace {
+/// A helper class, which walks the AST and locates all the call sites in the
+/// given function body.
+class CGBuilder : public StmtVisitor<CGBuilder> {
+  CallGraph *G;
+  const Decl *FD;
+  CallGraphNode *CallerNode;
+
+public:
+  CGBuilder(CallGraph *g, const Decl *D, CallGraphNode *N)
+    : G(g), FD(D), CallerNode(N) {}
+
+  void VisitStmt(Stmt *S) { VisitChildren(S); }
+
+  void VisitCallExpr(CallExpr *CE) {
+    // TODO: We need to handle ObjC method calls as well.
+    if (FunctionDecl *CalleeDecl = CE->getDirectCallee())
+      if (G->includeInGraph(CalleeDecl)) {
+        CallGraphNode *CalleeNode = G->getOrInsertNode(CalleeDecl);
+        CallerNode->addCallee(CalleeNode, G);
+      }
+  }
+
+  void VisitChildren(Stmt *S) {
+    for (Stmt::child_range I = S->children(); I; ++I)
+      if (*I)
+        static_cast<CGBuilder*>(this)->Visit(*I);
+  }
+};
+
+} // end anonymous namespace
+
+CallGraph::CallGraph() {
+  Root = getOrInsertNode(0);
+}
+
+CallGraph::~CallGraph() {
+  if (!FunctionMap.empty()) {
+    for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
+        I != E; ++I)
+      delete I->second;
+    FunctionMap.clear();
+  }
+}
+
+bool CallGraph::includeInGraph(const Decl *D) {
   if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
     // We skip function template definitions, as their semantics is
     // only determined when they are instantiated.
@@ -43,83 +86,15 @@
   return true;
 }
 
-namespace {
-/// A helper class, which walks the AST and locates all the call sites in the
-/// given function body.
-class CGBuilder : public StmtVisitor<CGBuilder> {
-  CallGraph *G;
-  const Decl *FD;
-  CallGraphNode *CallerNode;
-
-public:
-  CGBuilder(CallGraph *g, const Decl *D, CallGraphNode *N)
-    : G(g), FD(D), CallerNode(N) {}
-
-  void VisitStmt(Stmt *S) { VisitChildren(S); }
-
-  void VisitCallExpr(CallExpr *CE) {
-    // TODO: We need to handle ObjC method calls as well.
-    if (FunctionDecl *CalleeDecl = CE->getDirectCallee())
-      if (includeInGraph(CalleeDecl)) {
-        CallGraphNode *CalleeNode = G->getOrInsertFunction(CalleeDecl);
-        CallerNode->addCallee(CalleeNode, G);
-      }
-  }
-
-  void VisitChildren(Stmt *S) {
-    for (Stmt::child_range I = S->children(); I; ++I)
-      if (*I)
-        static_cast<CGBuilder*>(this)->Visit(*I);
-  }
-};
-
-/// A helper class which walks the AST declarations.
-// TODO: We might want to specialize the visitor to shrink the call graph.
-// For example, we might not want to include the inline methods from header
-// files.
-class CGDeclVisitor : public RecursiveASTVisitor<CGDeclVisitor> {
-  CallGraph *CG;
-
-public:
-  CGDeclVisitor(CallGraph * InCG) : CG(InCG) {}
-
-  bool VisitFunctionDecl(FunctionDecl *FD) {
-    // We skip function template definitions, as their semantics is
-    // only determined when they are instantiated.
-    if (includeInGraph(FD))
-      // If this function has external linkage, anything could call it.
-      // Note, we are not precise here. For example, the function could have
-      // its address taken.
-      CG->addToCallGraph(FD, FD->isGlobal());
-    return true;
-  }
-
-  bool VisitObjCMethodDecl(ObjCMethodDecl *MD) {
-    if (includeInGraph(MD))
-      CG->addToCallGraph(MD, true);
-    return true;
-  }
-};
-
-} // end anonymous namespace
-
-CallGraph::CallGraph() {
-  Root = getOrInsertFunction(0);
-}
-
-CallGraph::~CallGraph() {
-  if (!FunctionMap.empty()) {
-    for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
-        I != E; ++I)
-      delete I->second;
-    FunctionMap.clear();
-  }
-}
-
-void CallGraph::addToCallGraph(Decl* D, bool IsGlobal) {
+void CallGraph::addNodeForDecl(Decl* D, bool IsGlobal) {
   assert(D);
-  CallGraphNode *Node = getOrInsertFunction(D);
 
+  // Do nothing if the node already exists.
+  if (FunctionMap.find(D) != FunctionMap.end())
+    return;
+
+  // Allocate a new node, mark it as root, and process it's calls.
+  CallGraphNode *Node = getOrInsertNode(D);
   if (IsGlobal)
     Root->addCallee(Node, this);
 
@@ -129,17 +104,13 @@
     builder.Visit(Body);
 }
 
-void CallGraph::addToCallGraph(TranslationUnitDecl *TU) {
-  CGDeclVisitor(this).TraverseDecl(TU);
-}
-
 CallGraphNode *CallGraph::getNode(const Decl *F) const {
   FunctionMapTy::const_iterator I = FunctionMap.find(F);
   if (I == FunctionMap.end()) return 0;
   return I->second;
 }
 
-CallGraphNode *CallGraph::getOrInsertFunction(Decl *F) {
+CallGraphNode *CallGraph::getOrInsertNode(Decl *F) {
   CallGraphNode *&Node = FunctionMap[F];
   if (Node)
     return Node;
diff --git a/lib/StaticAnalyzer/Core/ExprEngine.cpp b/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 30a511d..d2da9aa 100644
--- a/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -67,7 +67,7 @@
 //===----------------------------------------------------------------------===//
 
 ExprEngine::ExprEngine(AnalysisManager &mgr, bool gcEnabled,
-                       SetOfDecls *VisitedCallees,
+                       SetOfConstDecls *VisitedCallees,
                        FunctionSummariesTy *FS)
   : AMgr(mgr),
     AnalysisDeclContexts(mgr.getAnalysisDeclContextManager()),
diff --git a/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp b/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
index 756ef32..c19ebcb 100644
--- a/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
+++ b/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
@@ -41,6 +41,7 @@
 #include "llvm/Support/Timer.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 
 #include <queue>
@@ -95,6 +96,13 @@
   AnalyzerOptions Opts;
   ArrayRef<std::string> Plugins;
 
+  /// \brief Stores the declarations from the local translation unit.
+  /// Note, we pre-compute the local declarations at parse time as an
+  /// optimization to make sure we do not deserialize everything from disk.
+  /// The local declaration to all declarations ratio might be very small when
+  /// working with a PCH file.
+  SetOfDecls LocalTUDecls;
+
   // PD is owned by AnalysisManager.
   PathDiagnosticConsumer *PD;
 
@@ -214,11 +222,16 @@
                                   Opts.NoRetryExhausted));
   }
 
+  /// \brief Store the top level decls in the set to be processed later on.
+  /// (Doing this pre-processing avoids deserialization of data from PCH.)
+  virtual bool HandleTopLevelDecl(DeclGroupRef D);
+  virtual void HandleTopLevelDeclInObjCContainer(DeclGroupRef D);
+
   virtual void HandleTranslationUnit(ASTContext &C);
 
-  /// \brief Build the call graph for the TU and use it to define the order
-  /// in which the functions should be visited.
-  void HandleDeclsGallGraph(TranslationUnitDecl *TU);
+  /// \brief Build the call graph for all the top level decls of this TU and
+  /// use it to define the order in which the functions should be visited.
+  void HandleDeclsGallGraph();
 
   /// \brief Run analyzes(syntax or path sensitive) on the given function.
   /// \param Mode - determines if we are requesting syntax only or path
@@ -226,13 +239,12 @@
   /// \param VisitedCallees - The output parameter, which is populated with the
   /// set of functions which should be considered analyzed after analyzing the
   /// given root function.
-  void HandleCode(Decl *D, AnalysisMode Mode, SetOfDecls *VisitedCallees = 0);
+  void HandleCode(Decl *D, AnalysisMode Mode,
+                  SetOfConstDecls *VisitedCallees = 0);
 
-  /// \brief Check if we should skip (not analyze) the given function.
-  bool skipFunction(Decl *D);
-
-  void RunPathSensitiveChecks(Decl *D, SetOfDecls *VisitedCallees);
-  void ActionExprEngine(Decl *D, bool ObjCGCEnabled, SetOfDecls *VisitedCallees);
+  void RunPathSensitiveChecks(Decl *D, SetOfConstDecls *VisitedCallees);
+  void ActionExprEngine(Decl *D, bool ObjCGCEnabled,
+                        SetOfConstDecls *VisitedCallees);
 
   /// Visitors for the RecursiveASTVisitor.
 
@@ -262,6 +274,13 @@
       HandleCode(MD, RecVisitorMode);
     return true;
   }
+
+private:
+  void storeTopLevelDecls(DeclGroupRef DG);
+
+  /// \brief Check if we should skip (not analyze) the given function.
+  bool skipFunction(Decl *D);
+
 };
 } // end anonymous namespace
 
@@ -271,11 +290,35 @@
 //===----------------------------------------------------------------------===//
 llvm::Timer* AnalysisConsumer::TUTotalTimer = 0;
 
-void AnalysisConsumer::HandleDeclsGallGraph(TranslationUnitDecl *TU) {
+bool AnalysisConsumer::HandleTopLevelDecl(DeclGroupRef DG) {
+  storeTopLevelDecls(DG);
+  return true;
+}
+
+void AnalysisConsumer::HandleTopLevelDeclInObjCContainer(DeclGroupRef DG) {
+  storeTopLevelDecls(DG);
+}
+
+void AnalysisConsumer::storeTopLevelDecls(DeclGroupRef DG) {
+  for (DeclGroupRef::iterator I = DG.begin(), E = DG.end(); I != E; ++I) {
+
+    // Skip ObjCMethodDecl, wait for the objc container to avoid
+    // analyzing twice.
+    if (isa<ObjCMethodDecl>(*I))
+      continue;
+
+    LocalTUDecls.insert(*I);
+  }
+}
+
+void AnalysisConsumer::HandleDeclsGallGraph() {
   // Otherwise, use the Callgraph to derive the order.
   // Build the Call Graph.
   CallGraph CG;
-  CG.addToCallGraph(TU);
+  // Add all the top level declarations to the graph.
+  for (SetOfDecls::iterator I = LocalTUDecls.begin(),
+                            E = LocalTUDecls.end(); I != E; ++I)
+    CG.addToCallGraph(*I);
 
   // Find the top level nodes - children of root + the unreachable (parentless)
   // nodes.
@@ -316,15 +359,15 @@
       continue;
 
     // Analyze the function.
-    SetOfDecls VisitedCallees;
+    SetOfConstDecls VisitedCallees;
     Decl *D = N->getDecl();
     assert(D);
     HandleCode(D, ANALYSIS_PATH,
                (Mgr->InliningMode == All ? 0 : &VisitedCallees));
 
     // Add the visited callees to the global visited set.
-    for (SetOfDecls::const_iterator I = VisitedCallees.begin(),
-                                    E = VisitedCallees.end(); I != E; ++I) {
+    for (SetOfConstDecls::const_iterator I = VisitedCallees.begin(),
+                                         E = VisitedCallees.end(); I != E; ++I){
       CallGraphNode *VN = CG.getNode(*I);
       if (VN)
         Visited.insert(VN);
@@ -358,10 +401,14 @@
     // sensitive analyzes as well.
     RecVisitorMode = (Mgr->shouldInlineCall() ? ANALYSIS_SYNTAX : ANALYSIS_ALL);
     RecVisitorBR = &BR;
-    TraverseDecl(TU);
+
+    // Process all the top level declarations.
+    for (SetOfDecls::iterator I = LocalTUDecls.begin(),
+                              E = LocalTUDecls.end(); I != E; ++I)
+      TraverseDecl(*I);
 
     if (Mgr->shouldInlineCall())
-      HandleDeclsGallGraph(TU);
+      HandleDeclsGallGraph();
 
     // After all decls handled, run checkers on the entire TranslationUnit.
     checkerMgr->runCheckersOnEndOfTranslationUnit(TU, *Mgr, BR);
@@ -424,7 +471,7 @@
 }
 
 void AnalysisConsumer::HandleCode(Decl *D, AnalysisMode Mode,
-                                  SetOfDecls *VisitedCallees) {
+                                  SetOfConstDecls *VisitedCallees) {
   if (skipFunction(D))
     return;
 
@@ -458,7 +505,7 @@
 //===----------------------------------------------------------------------===//
 
 void AnalysisConsumer::ActionExprEngine(Decl *D, bool ObjCGCEnabled,
-                                        SetOfDecls *VisitedCallees) {
+                                        SetOfConstDecls *VisitedCallees) {
   // Construct the analysis engine.  First check if the CFG is valid.
   // FIXME: Inter-procedural analysis will need to handle invalid CFGs.
   if (!Mgr->getCFG(D))
@@ -489,7 +536,8 @@
   Eng.getBugReporter().FlushReports();
 }
 
-void AnalysisConsumer::RunPathSensitiveChecks(Decl *D, SetOfDecls *Visited) {
+void AnalysisConsumer::RunPathSensitiveChecks(Decl *D,
+                                              SetOfConstDecls *Visited) {
 
   switch (Mgr->getLangOpts().getGC()) {
   case LangOptions::NonGC: