Introduce caching of diagnostics in BugReporter.  This provides extra
pruning of diagnostics that may be emitted multiple times.  This is
accomplished by adding FoldingSet profiling support to PathDiagnostic,
and then having BugReporter record what diagnostics have been issued.

This was motived to a serious bug introduced by moving the
'divide-by-zero' checking outside of GRExprEngine into a separate
'Checker' class.  When analyzing code using the '-fobjc-gc' option, a
given function would be analyzed twice, but the second time various
"internal checks" would be disabled to avoid emitting multiple
diagnostics (e.g., "null dereference") for the same issue.  The
problem is that such checks also effect path pruning and don't just
emit diagnostics.  This resulted in an assertion failure involving a
real divide-by-zero in some analyzed code where we would get an
assertion failure in APInt because the 'DivZero' check was disabled
and didn't prune the logic that resulted in the divide-by-zero in the
analyzer.

The implemented solution is somewhat of a hack, and may not perform
extremely well.  This will need to be cleaned up over time.

As a regression test, 'misc-ps.m' has been modified so that its tests
are run using -fobjc-gc to test this diagnostic pruning behavior.


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@82198 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/clang/Analysis/PathDiagnostic.h b/include/clang/Analysis/PathDiagnostic.h
index 809c831..0635a01 100644
--- a/include/clang/Analysis/PathDiagnostic.h
+++ b/include/clang/Analysis/PathDiagnostic.h
@@ -17,6 +17,7 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Diagnostic.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/FoldingSet.h"
 
 #include <vector>
 #include <deque>
@@ -24,12 +25,17 @@
 #include <algorithm>
 
 namespace clang {
-
+    
+class Stmt;
+class Decl;
+class Preprocessor;
+  
 //===----------------------------------------------------------------------===//
 // High-level interface for handlers of path-sensitive diagnostics.
 //===----------------------------------------------------------------------===//
 
 class PathDiagnostic;
+    
 class Stmt;
 class Decl;
 class Preprocessor;
@@ -38,12 +44,9 @@
 public:
   PathDiagnosticClient() {}
   virtual ~PathDiagnosticClient() {}
-
   virtual void SetPreprocessor(Preprocessor *PP) {}
-
   virtual void HandleDiagnostic(Diagnostic::Level DiagLevel,
                                 const DiagnosticInfo &Info);
-
   virtual void HandlePathDiagnostic(const PathDiagnostic* D) = 0;
 
   enum PathGenerationScheme { Minimal, Extensive };
@@ -125,6 +128,8 @@
   void flatten();
 
   const SourceManager& getManager() const { assert(isValid()); return *SM; }
+  
+  void Profile(llvm::FoldingSetNodeID &ID) const;
 };
 
 class PathDiagnosticLocationPair {
@@ -142,6 +147,11 @@
     Start.flatten();
     End.flatten();
   }
+  
+  void Profile(llvm::FoldingSetNodeID &ID) const {
+    Start.Profile(ID);
+    End.Profile(ID);
+  }
 };
 
 //===----------------------------------------------------------------------===//
@@ -220,6 +230,8 @@
   static inline bool classof(const PathDiagnosticPiece* P) {
     return true;
   }
+  
+  virtual void Profile(llvm::FoldingSetNodeID &ID) const;
 };
 
 class PathDiagnosticSpotPiece : public PathDiagnosticPiece {
@@ -238,6 +250,8 @@
 
   PathDiagnosticLocation getLocation() const { return Pos; }
   virtual void flattenLocations() { Pos.flatten(); }
+  
+  virtual void Profile(llvm::FoldingSetNodeID &ID) const;
 };
 
 class PathDiagnosticEventPiece : public PathDiagnosticSpotPiece {
@@ -317,6 +331,8 @@
   static inline bool classof(const PathDiagnosticPiece* P) {
     return P->getKind() == ControlFlow;
   }
+  
+  virtual void Profile(llvm::FoldingSetNodeID &ID) const;
 };
 
 class PathDiagnosticMacroPiece : public PathDiagnosticSpotPiece {
@@ -347,12 +363,14 @@
   static inline bool classof(const PathDiagnosticPiece* P) {
     return P->getKind() == Macro;
   }
+  
+  virtual void Profile(llvm::FoldingSetNodeID &ID) const;
 };
 
 /// PathDiagnostic - PathDiagnostic objects represent a single path-sensitive
 ///  diagnostic.  It represents an ordered-collection of PathDiagnosticPieces,
 ///  each which represent the pieces of the path.
-class PathDiagnostic {
+class PathDiagnostic : public llvm::FoldingSetNode {
   std::deque<PathDiagnosticPiece*> path;
   unsigned Size;
   std::string BugType;
@@ -386,11 +404,13 @@
   }
 
   void push_front(PathDiagnosticPiece* piece) {
+    assert(piece);
     path.push_front(piece);
     ++Size;
   }
 
   void push_back(PathDiagnosticPiece* piece) {
+    assert(piece);
     path.push_back(piece);
     ++Size;
   }
@@ -453,7 +473,7 @@
     bool operator==(const const_iterator& X) const { return I == X.I; }
     bool operator!=(const const_iterator& X) const { return I != X.I; }
 
-    reference operator*() const { return **I; }
+    reference operator*() const { assert(*I); return **I; }
     pointer operator->() const { return *I; }
 
     const_iterator& operator++() { ++I; return *this; }
@@ -480,8 +500,8 @@
   void flattenLocations() {
     for (iterator I = begin(), E = end(); I != E; ++I) I->flattenLocations();
   }
-};
-
-
+  
+  void Profile(llvm::FoldingSetNodeID &ID) const;
+};  
 } //end clang namespace
 #endif
diff --git a/lib/Analysis/BugReporter.cpp b/lib/Analysis/BugReporter.cpp
index 38e9828..064fff4 100644
--- a/lib/Analysis/BugReporter.cpp
+++ b/lib/Analysis/BugReporter.cpp
@@ -1732,6 +1732,50 @@
   return NULL;
 }
 
+
+//===----------------------------------------------------------------------===//
+// DiagnosticCache.  This is a hack to cache analyzer diagnostics.  It
+// uses global state, which eventually should go elsewhere.
+//===----------------------------------------------------------------------===//
+namespace {
+class VISIBILITY_HIDDEN DiagCacheItem : public llvm::FoldingSetNode {
+  llvm::FoldingSetNodeID ID;
+public:
+  DiagCacheItem(BugReport *R, PathDiagnostic *PD) {
+    ID.AddString(R->getBugType().getName());
+    ID.AddString(R->getBugType().getCategory());
+    ID.AddString(R->getDescription());
+    ID.AddInteger(R->getLocation().getRawEncoding());
+    PD->Profile(ID);    
+  }
+  
+  void Profile(llvm::FoldingSetNodeID &id) {
+    id = ID;
+  }
+  
+  llvm::FoldingSetNodeID &getID() { return ID; }
+};
+}
+
+static bool IsCachedDiagnostic(BugReport *R, PathDiagnostic *PD) {
+  // FIXME: Eventually this diagnostic cache should reside in something
+  // like AnalysisManager instead of being a static variable.  This is
+  // really unsafe in the long term.
+  typedef llvm::FoldingSet<DiagCacheItem> DiagnosticCache;
+  static DiagnosticCache DC;
+  
+  void *InsertPos;
+  DiagCacheItem *Item = new DiagCacheItem(R, PD);
+  
+  if (DC.FindNodeOrInsertPos(Item->getID(), InsertPos)) {
+    delete Item;
+    return true;
+  }
+  
+  DC.InsertNode(Item, InsertPos);
+  return false;
+}
+
 void BugReporter::FlushReport(BugReportEquivClass& EQ) {
   BugReport *R = FindReportInEquivalenceClass(EQ);
 
@@ -1752,6 +1796,9 @@
 
   GeneratePathDiagnostic(*D.get(), EQ);
 
+  if (IsCachedDiagnostic(R, D.get()))
+    return;
+  
   // Get the meta data.
   std::pair<const char**, const char**> Meta = R->getExtraDescriptiveText();
   for (const char** s = Meta.first; s != Meta.second; ++s)
diff --git a/lib/Analysis/PathDiagnostic.cpp b/lib/Analysis/PathDiagnostic.cpp
index ceead6a..800496a 100644
--- a/lib/Analysis/PathDiagnostic.cpp
+++ b/lib/Analysis/PathDiagnostic.cpp
@@ -239,4 +239,66 @@
   }
 }
 
+//===----------------------------------------------------------------------===//
+// FoldingSet profiling methods.
+//===----------------------------------------------------------------------===//
 
+void PathDiagnosticLocation::Profile(llvm::FoldingSetNodeID &ID) const {
+  ID.AddInteger((unsigned) K);
+  switch (K) {
+    case RangeK:
+      ID.AddInteger(R.getBegin().getRawEncoding());
+      ID.AddInteger(R.getEnd().getRawEncoding());
+      break;      
+    case SingleLocK:
+      ID.AddInteger(R.getBegin().getRawEncoding());
+      break;
+    case StmtK:
+      ID.Add(S);
+      break;
+    case DeclK:
+      ID.Add(D);
+      break;
+  }
+  return;
+}
+
+void PathDiagnosticPiece::Profile(llvm::FoldingSetNodeID &ID) const {
+  ID.AddInteger((unsigned) getKind());
+  ID.AddString(str);
+  // FIXME: Add profiling support for code hints.
+  ID.AddInteger((unsigned) getDisplayHint());
+  for (range_iterator I = ranges_begin(), E = ranges_end(); I != E; ++I) {
+    ID.AddInteger(I->getBegin().getRawEncoding());
+    ID.AddInteger(I->getEnd().getRawEncoding());
+  }  
+}
+
+void PathDiagnosticSpotPiece::Profile(llvm::FoldingSetNodeID &ID) const {
+  PathDiagnosticPiece::Profile(ID);
+  ID.Add(Pos);
+}
+
+void PathDiagnosticControlFlowPiece::Profile(llvm::FoldingSetNodeID &ID) const {
+  PathDiagnosticPiece::Profile(ID);
+  for (const_iterator I = begin(), E = end(); I != E; ++I)
+    ID.Add(*I);
+}
+
+void PathDiagnosticMacroPiece::Profile(llvm::FoldingSetNodeID &ID) const {
+  PathDiagnosticSpotPiece::Profile(ID);
+  for (const_iterator I = begin(), E = end(); I != E; ++I)
+    ID.Add(**I);
+}
+
+void PathDiagnostic::Profile(llvm::FoldingSetNodeID &ID) const {
+  ID.AddInteger(Size);
+  ID.AddString(BugType);
+  ID.AddString(Desc);
+  ID.AddString(Category);
+  for (const_iterator I = begin(), E = end(); I != E; ++I)
+    ID.Add(*I);
+  
+  for (meta_iterator I = meta_begin(), E = meta_end(); I != E; ++I)
+    ID.AddString(*I);
+}
diff --git a/lib/Frontend/AnalysisConsumer.cpp b/lib/Frontend/AnalysisConsumer.cpp
index 034ff93..0e25de4 100644
--- a/lib/Frontend/AnalysisConsumer.cpp
+++ b/lib/Frontend/AnalysisConsumer.cpp
@@ -293,8 +293,7 @@
 
 
 static void ActionGRExprEngine(AnalysisManager& mgr, Decl *D, 
-                               GRTransferFuncs* tf,
-                               bool StandardWarnings = true) {
+                               GRTransferFuncs* tf) {
 
 
   llvm::OwningPtr<GRTransferFuncs> TF(tf);
@@ -303,17 +302,13 @@
   mgr.DisplayFunction(D);
 
   // Construct the analysis engine.
-  LiveVariables* L = mgr.getLiveVariables(D);
-  if (!L) return;
-
   GRExprEngine Eng(mgr);
 
   Eng.setTransferFunctions(tf);
+  Eng.RegisterInternalChecks(); // FIXME: Internal checks should just
+                                // automatically register.
+  RegisterAppleChecks(Eng, *D);
 
-  if (StandardWarnings) {
-    Eng.RegisterInternalChecks();
-    RegisterAppleChecks(Eng, *D);
-  }
 
   // Set the graph auditor.
   llvm::OwningPtr<ExplodedNode::Auditor> Auditor;
@@ -338,13 +333,13 @@
 }
 
 static void ActionCheckerCFRefAux(AnalysisManager& mgr, Decl *D,
-                                  bool GCEnabled, bool StandardWarnings) {
+                                  bool GCEnabled) {
 
   GRTransferFuncs* TF = MakeCFRefCountTF(mgr.getASTContext(),
                                          GCEnabled,
                                          mgr.getLangOptions());
 
-  ActionGRExprEngine(mgr, D, TF, StandardWarnings);
+  ActionGRExprEngine(mgr, D, TF);
 }
 
 static void ActionCheckerCFRef(AnalysisManager& mgr, Decl *D) {
@@ -353,16 +348,16 @@
    default:
      assert (false && "Invalid GC mode.");
    case LangOptions::NonGC:
-     ActionCheckerCFRefAux(mgr, D, false, true);
+     ActionCheckerCFRefAux(mgr, D, false);
      break;
 
    case LangOptions::GCOnly:
-     ActionCheckerCFRefAux(mgr, D, true, true);
+     ActionCheckerCFRefAux(mgr, D, true);
      break;
 
    case LangOptions::HybridGC:
-     ActionCheckerCFRefAux(mgr, D, false, true);
-     ActionCheckerCFRefAux(mgr, D, true, false);
+     ActionCheckerCFRefAux(mgr, D, false);
+     ActionCheckerCFRefAux(mgr, D, true);
      break;
  }
 }
diff --git a/test/Analysis/misc-ps.m b/test/Analysis/misc-ps.m
index 189fc44..d7c3db7 100644
--- a/test/Analysis/misc-ps.m
+++ b/test/Analysis/misc-ps.m
@@ -1,4 +1,5 @@
-// RUN: clang-cc -analyze -checker-cfref --analyzer-store=basic -analyzer-constraints=basic --verify -fblocks %s &&
+// NOTE: Use '-fobjc-gc' to test the analysis being run twice, and multiple reports are not issued.
+// RUN: clang-cc -analyze -checker-cfref --analyzer-store=basic -fobjc-gc -analyzer-constraints=basic --verify -fblocks %s &&
 // RUN: clang-cc -analyze -checker-cfref --analyzer-store=basic -analyzer-constraints=range --verify -fblocks %s &&
 // RUN: clang-cc -analyze -checker-cfref --analyzer-store=region -analyzer-constraints=basic --verify -fblocks %s &&
 // RUN: clang-cc -analyze -checker-cfref --analyzer-store=region -analyzer-constraints=range --verify -fblocks %s