[analyzer] Improve CloneChecker diagnostics

Highlight code clones referenced by the warning message with the help of
the extra notes feature recently introduced in r283092.

Change warning text to more clang-ish. Remove suggestions from the copy-paste
error checker diagnostics, because currently our suggestions are strictly 50%
wrong (we do not know which of the two code clones contains the error), and
for that reason we should not sound as if we're actually suggesting this.
Hopefully a better solution would bring them back.

Make sure the suspicious clone pair structure always mentions
the correct variable for the second clone.

Differential Revision: https://reviews.llvm.org/D24916

llvm-svn: 283094
diff --git a/clang/lib/Analysis/CloneDetection.cpp b/clang/lib/Analysis/CloneDetection.cpp
index a91ccaa..dba89ec 100644
--- a/clang/lib/Analysis/CloneDetection.cpp
+++ b/clang/lib/Analysis/CloneDetection.cpp
@@ -82,6 +82,10 @@
 
 SourceLocation StmtSequence::getEndLoc() const { return back()->getLocEnd(); }
 
+SourceRange StmtSequence::getSourceRange() const {
+  return SourceRange(getStartLoc(), getEndLoc());
+}
+
 namespace {
 
 /// \brief Analyzes the pattern of the referenced variables in a statement.
@@ -91,11 +95,11 @@
   struct VariableOccurence {
     /// The index of the associated VarDecl in the Variables vector.
     size_t KindID;
-    /// The source range in the code where the variable was referenced.
-    SourceRange Range;
+    /// The statement in the code where the variable was referenced.
+    const Stmt *Mention;
 
-    VariableOccurence(size_t KindID, SourceRange Range)
-        : KindID(KindID), Range(Range) {}
+    VariableOccurence(size_t KindID, const Stmt *Mention)
+        : KindID(KindID), Mention(Mention) {}
   };
 
   /// All occurences of referenced variables in the order of appearance.
@@ -107,19 +111,19 @@
   /// \brief Adds a new variable referenced to this pattern.
   /// \param VarDecl The declaration of the variable that is referenced.
   /// \param Range The SourceRange where this variable is referenced.
-  void addVariableOccurence(const VarDecl *VarDecl, SourceRange Range) {
+  void addVariableOccurence(const VarDecl *VarDecl, const Stmt *Mention) {
     // First check if we already reference this variable
     for (size_t KindIndex = 0; KindIndex < Variables.size(); ++KindIndex) {
       if (Variables[KindIndex] == VarDecl) {
         // If yes, add a new occurence that points to the existing entry in
         // the Variables vector.
-        Occurences.emplace_back(KindIndex, Range);
+        Occurences.emplace_back(KindIndex, Mention);
         return;
       }
     }
     // If this variable wasn't already referenced, add it to the list of
     // referenced variables and add a occurence that points to this new entry.
-    Occurences.emplace_back(Variables.size(), Range);
+    Occurences.emplace_back(Variables.size(), Mention);
     Variables.push_back(VarDecl);
   }
 
@@ -134,7 +138,7 @@
     // Check if S is a reference to a variable. If yes, add it to the pattern.
     if (auto D = dyn_cast<DeclRefExpr>(S)) {
       if (auto VD = dyn_cast<VarDecl>(D->getDecl()->getCanonicalDecl()))
-        addVariableOccurence(VD, D->getSourceRange());
+        addVariableOccurence(VD, D);
     }
 
     // Recursively check all children of the given statement.
@@ -208,7 +212,7 @@
       // Store information about the first clone.
       FirstMismatch->FirstCloneInfo =
           CloneDetector::SuspiciousClonePair::SuspiciousCloneInfo(
-              Variables[ThisOccurence.KindID], ThisOccurence.Range,
+              Variables[ThisOccurence.KindID], ThisOccurence.Mention,
               FirstSuggestion);
 
       // Same as above but with the other clone. We do this for both clones as
@@ -221,7 +225,7 @@
       // Store information about the second clone.
       FirstMismatch->SecondCloneInfo =
           CloneDetector::SuspiciousClonePair::SuspiciousCloneInfo(
-              Variables[ThisOccurence.KindID], OtherOccurence.Range,
+              Other.Variables[OtherOccurence.KindID], OtherOccurence.Mention,
               SecondSuggestion);
 
       // SuspiciousClonePair guarantees that the first clone always has a
diff --git a/clang/lib/StaticAnalyzer/Checkers/CloneChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CloneChecker.cpp
index b4ac223..6fa5732 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CloneChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CloneChecker.cpp
@@ -16,8 +16,10 @@
 #include "ClangSACheckers.h"
 #include "clang/Analysis/CloneDetection.h"
 #include "clang/Basic/Diagnostic.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 
 using namespace clang;
@@ -27,6 +29,7 @@
 class CloneChecker
     : public Checker<check::ASTCodeBody, check::EndOfTranslationUnit> {
   mutable CloneDetector Detector;
+  mutable std::unique_ptr<BugType> BT_Exact, BT_Suspicious;
 
 public:
   void checkASTCodeBody(const Decl *D, AnalysisManager &Mgr,
@@ -36,12 +39,12 @@
                                  AnalysisManager &Mgr, BugReporter &BR) const;
 
   /// \brief Reports all clones to the user.
-  void reportClones(SourceManager &SM, AnalysisManager &Mgr,
+  void reportClones(BugReporter &BR, AnalysisManager &Mgr,
                     int MinComplexity) const;
 
   /// \brief Reports only suspicious clones to the user along with informaton
   ///        that explain why they are suspicious.
-  void reportSuspiciousClones(SourceManager &SM, AnalysisManager &Mgr,
+  void reportSuspiciousClones(BugReporter &BR, AnalysisManager &Mgr,
                               int MinComplexity) const;
 };
 } // end anonymous namespace
@@ -70,79 +73,82 @@
       "ReportNormalClones", true, this);
 
   if (ReportSuspiciousClones)
-    reportSuspiciousClones(BR.getSourceManager(), Mgr, MinComplexity);
+    reportSuspiciousClones(BR, Mgr, MinComplexity);
 
   if (ReportNormalClones)
-    reportClones(BR.getSourceManager(), Mgr, MinComplexity);
+    reportClones(BR, Mgr, MinComplexity);
 }
 
-void CloneChecker::reportClones(SourceManager &SM, AnalysisManager &Mgr,
+static PathDiagnosticLocation makeLocation(const StmtSequence &S,
+                                           AnalysisManager &Mgr) {
+  ASTContext &ACtx = Mgr.getASTContext();
+  return PathDiagnosticLocation::createBegin(
+      S.front(), ACtx.getSourceManager(),
+      Mgr.getAnalysisDeclContext(ACtx.getTranslationUnitDecl()));
+}
+
+void CloneChecker::reportClones(BugReporter &BR, AnalysisManager &Mgr,
                                 int MinComplexity) const {
 
   std::vector<CloneDetector::CloneGroup> CloneGroups;
   Detector.findClones(CloneGroups, MinComplexity);
 
-  DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic();
-
-  unsigned WarnID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Warning,
-                                               "Detected code clone.");
-
-  unsigned NoteID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Note,
-                                               "Related code clone is here.");
+  if (!BT_Exact)
+    BT_Exact.reset(new BugType(this, "Exact code clone", "Code clone"));
 
   for (CloneDetector::CloneGroup &Group : CloneGroups) {
     // We group the clones by printing the first as a warning and all others
     // as a note.
-    DiagEngine.Report(Group.Sequences.front().getStartLoc(), WarnID);
-    for (unsigned i = 1; i < Group.Sequences.size(); ++i) {
-      DiagEngine.Report(Group.Sequences[i].getStartLoc(), NoteID);
-    }
+    auto R = llvm::make_unique<BugReport>(
+        *BT_Exact, "Duplicate code detected",
+        makeLocation(Group.Sequences.front(), Mgr));
+    R->addRange(Group.Sequences.front().getSourceRange());
+
+    for (unsigned i = 1; i < Group.Sequences.size(); ++i)
+      R->addNote("Similar code here",
+                      makeLocation(Group.Sequences[i], Mgr),
+                      Group.Sequences[i].getSourceRange());
+    BR.emitReport(std::move(R));
   }
 }
 
-void CloneChecker::reportSuspiciousClones(SourceManager &SM,
+void CloneChecker::reportSuspiciousClones(BugReporter &BR,
                                           AnalysisManager &Mgr,
                                           int MinComplexity) const {
 
   std::vector<CloneDetector::SuspiciousClonePair> Clones;
   Detector.findSuspiciousClones(Clones, MinComplexity);
 
-  DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic();
+  if (!BT_Suspicious)
+    BT_Suspicious.reset(
+        new BugType(this, "Suspicious code clone", "Code clone"));
 
-  auto SuspiciousCloneWarning = DiagEngine.getCustomDiagID(
-      DiagnosticsEngine::Warning, "suspicious code clone detected; did you "
-                                  "mean to use %0?");
-
-  auto RelatedCloneNote = DiagEngine.getCustomDiagID(
-      DiagnosticsEngine::Note, "suggestion is based on the usage of this "
-                               "variable in a similar piece of code");
-
-  auto RelatedSuspiciousCloneNote = DiagEngine.getCustomDiagID(
-      DiagnosticsEngine::Note, "suggestion is based on the usage of this "
-                               "variable in a similar piece of code; did you "
-                               "mean to use %0?");
+  ASTContext &ACtx = BR.getContext();
+  SourceManager &SM = ACtx.getSourceManager();
+  AnalysisDeclContext *ADC =
+      Mgr.getAnalysisDeclContext(ACtx.getTranslationUnitDecl());
 
   for (CloneDetector::SuspiciousClonePair &Pair : Clones) {
-    // The first clone always has a suggestion and we report it to the user
-    // along with the place where the suggestion should be used.
-    DiagEngine.Report(Pair.FirstCloneInfo.VarRange.getBegin(),
-                      SuspiciousCloneWarning)
-        << Pair.FirstCloneInfo.VarRange << Pair.FirstCloneInfo.Suggestion;
+    // FIXME: We are ignoring the suggestions currently, because they are
+    // only 50% accurate (even if the second suggestion is unavailable),
+    // which may confuse the user.
+    // Think how to perform more accurate suggestions?
 
-    // The second clone can have a suggestion and if there is one, we report
-    // that suggestion to the user.
-    if (Pair.SecondCloneInfo.Suggestion) {
-      DiagEngine.Report(Pair.SecondCloneInfo.VarRange.getBegin(),
-                        RelatedSuspiciousCloneNote)
-          << Pair.SecondCloneInfo.VarRange << Pair.SecondCloneInfo.Suggestion;
-      continue;
-    }
+    auto R = llvm::make_unique<BugReport>(
+        *BT_Suspicious,
+        "Potential copy-paste error; did you really mean to use '" +
+            Pair.FirstCloneInfo.Variable->getNameAsString() + "' here?",
+        PathDiagnosticLocation::createBegin(Pair.FirstCloneInfo.Mention, SM,
+                                            ADC));
+    R->addRange(Pair.FirstCloneInfo.Mention->getSourceRange());
 
-    // If there isn't a suggestion in the second clone, we only inform the
-    // user where we got the idea that his code could contain an error.
-    DiagEngine.Report(Pair.SecondCloneInfo.VarRange.getBegin(),
-                      RelatedCloneNote)
-        << Pair.SecondCloneInfo.VarRange;
+    R->addNote("Similar code using '" +
+                   Pair.SecondCloneInfo.Variable->getNameAsString() + "' here",
+               PathDiagnosticLocation::createBegin(Pair.SecondCloneInfo.Mention,
+                                                   SM, ADC),
+               Pair.SecondCloneInfo.Mention->getSourceRange());
+
+    BR.emitReport(std::move(R));
   }
 }