[analyzer] Bug identification
This patch adds hashes to the plist and html output to be able to identfy bugs
for suppressing false positives or diff results against a baseline. This hash
aims to be resilient for code evolution and is usable to identify bugs in two
different snapshots of the same software. One missing piece however is a
permanent unique identifier of the checker that produces the warning. Once that
issue is resolved, the hashes generated are going to change. Until that point
this feature is marked experimental, but it is suitable for early adoption.
Differential Revision: http://reviews.llvm.org/D10305
Original patch by: Bence Babati!
llvm-svn: 251011
diff --git a/clang/lib/StaticAnalyzer/Checkers/Checkers.td b/clang/lib/StaticAnalyzer/Checkers/Checkers.td
index 0843cb4..03d09d3 100644
--- a/clang/lib/StaticAnalyzer/Checkers/Checkers.td
+++ b/clang/lib/StaticAnalyzer/Checkers/Checkers.td
@@ -591,4 +591,8 @@
HelpText<"View Exploded Graphs using GraphViz">,
DescFile<"DebugCheckers.cpp">;
+def BugHashDumper : Checker<"DumpBugHash">,
+ HelpText<"Dump the bug hash for all statements.">,
+ DescFile<"DebugCheckers.cpp">;
+
} // end "debug"
diff --git a/clang/lib/StaticAnalyzer/Checkers/DebugCheckers.cpp b/clang/lib/StaticAnalyzer/Checkers/DebugCheckers.cpp
index 51e7a3d..acf0004 100644
--- a/clang/lib/StaticAnalyzer/Checkers/DebugCheckers.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/DebugCheckers.cpp
@@ -16,7 +16,10 @@
#include "clang/Analysis/Analyses/LiveVariables.h"
#include "clang/Analysis/CallGraph.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/IssueHash.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
#include "llvm/Support/Process.h"
@@ -209,3 +212,35 @@
void ento::registerExplodedGraphViewer(CheckerManager &mgr) {
mgr.registerChecker<ExplodedGraphViewer>();
}
+
+//===----------------------------------------------------------------------===//
+// DumpBugHash
+//===----------------------------------------------------------------------===//
+
+namespace {
+class BugHashDumper : public Checker<check::PostStmt<Stmt>> {
+public:
+ mutable std::unique_ptr<BugType> BT;
+
+ void checkPostStmt(const Stmt *S, CheckerContext &C) const {
+ if (!BT)
+ BT.reset(new BugType(this, "Dump hash components", "debug"));
+
+ ExplodedNode *N = C.generateNonFatalErrorNode();
+ if (!N)
+ return;
+
+ const SourceManager &SM = C.getSourceManager();
+ FullSourceLoc FL(S->getLocStart(), SM);
+ std::string HashContent =
+ GetIssueString(SM, FL, getCheckName().getName(), BT->getCategory(),
+ C.getLocationContext()->getDecl());
+
+ C.emitReport(llvm::make_unique<BugReport>(*BT, HashContent, N));
+ }
+};
+}
+
+void ento::registerBugHashDumper(CheckerManager &mgr) {
+ mgr.registerChecker<BugHashDumper>();
+}
diff --git a/clang/lib/StaticAnalyzer/Core/CMakeLists.txt b/clang/lib/StaticAnalyzer/Core/CMakeLists.txt
index f0db381..4499323 100644
--- a/clang/lib/StaticAnalyzer/Core/CMakeLists.txt
+++ b/clang/lib/StaticAnalyzer/Core/CMakeLists.txt
@@ -6,6 +6,7 @@
AnalyzerOptions.cpp
BasicValueFactory.cpp
BlockCounter.cpp
+ IssueHash.cpp
BugReporter.cpp
BugReporterVisitors.cpp
CallEvent.cpp
diff --git a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
index e7a11ac..86eab41 100644
--- a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
+++ b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
#include "clang/Basic/FileManager.h"
@@ -22,6 +21,8 @@
#include "clang/Rewrite/Core/Rewriter.h"
#include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/IssueHash.h"
+#include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -236,6 +237,13 @@
if (!BugType.empty())
os << "\n<!-- BUGTYPE " << BugType << " -->\n";
+ PathDiagnosticLocation UPDLoc = D.getUniqueingLoc();
+ FullSourceLoc L(SMgr.getExpansionLoc(UPDLoc.isValid()
+ ? UPDLoc.asLocation()
+ : D.getLocation().asLocation()),
+ SMgr);
+ const Decl *DeclWithIssue = D.getDeclWithIssue();
+
StringRef BugCategory = D.getCategory();
if (!BugCategory.empty())
os << "\n<!-- BUGCATEGORY " << BugCategory << " -->\n";
@@ -246,6 +254,10 @@
os << "\n<!-- FUNCTIONNAME " << declName << " -->\n";
+ os << "\n<!-- ISSUEHASHCONTENTOFLINEINCONTEXT "
+ << GetIssueHash(SMgr, L, D.getCheckName(), D.getBugType(), DeclWithIssue)
+ << " -->\n";
+
os << "\n<!-- BUGLINE "
<< LineNumber
<< " -->\n";
diff --git a/clang/lib/StaticAnalyzer/Core/IssueHash.cpp b/clang/lib/StaticAnalyzer/Core/IssueHash.cpp
new file mode 100644
index 0000000..bfb7486
--- /dev/null
+++ b/clang/lib/StaticAnalyzer/Core/IssueHash.cpp
@@ -0,0 +1,193 @@
+//===---------- IssueHash.cpp - Generate identification hashes --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "clang/StaticAnalyzer/Core/IssueHash.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/Specifiers.h"
+#include "clang/Lex/Lexer.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MD5.h"
+#include "llvm/Support/Path.h"
+
+#include <functional>
+#include <sstream>
+#include <string>
+
+using namespace clang;
+
+// Get a string representation of the parts of the signature that can be
+// overloaded on.
+static std::string GetSignature(const FunctionDecl *Target) {
+ if (!Target)
+ return "";
+ std::string Signature;
+
+ if (!isa<CXXConstructorDecl>(Target) && !isa<CXXDestructorDecl>(Target) &&
+ !isa<CXXConversionDecl>(Target))
+ Signature.append(Target->getReturnType().getAsString()).append(" ");
+ Signature.append(Target->getQualifiedNameAsString()).append("(");
+
+ for (int i = 0, paramsCount = Target->getNumParams(); i < paramsCount; ++i) {
+ if (i)
+ Signature.append(", ");
+ Signature.append(Target->getParamDecl(i)->getType().getAsString());
+ }
+
+ if (Target->isVariadic())
+ Signature.append(", ...");
+ Signature.append(")");
+
+ const auto *TargetT =
+ llvm::dyn_cast_or_null<FunctionType>(Target->getType().getTypePtr());
+
+ if (!TargetT)
+ return Signature;
+
+ if (TargetT->isConst())
+ Signature.append(" const");
+ if (TargetT->isVolatile())
+ Signature.append(" volatile");
+ if (TargetT->isRestrict())
+ Signature.append(" restrict");
+
+ if (const auto *TargetPT =
+ dyn_cast_or_null<FunctionProtoType>(Target->getType().getTypePtr())) {
+ switch (TargetPT->getRefQualifier()) {
+ case RQ_LValue:
+ Signature.append(" &");
+ break;
+ case RQ_RValue:
+ Signature.append(" &&");
+ break;
+ default:
+ break;
+ }
+ }
+
+ return Signature;
+}
+
+static std::string GetEnclosingDeclContextSignature(const Decl *D) {
+ if (!D)
+ return "";
+
+ if (const auto *ND = dyn_cast<NamedDecl>(D)) {
+ std::string DeclName;
+
+ switch (ND->getKind()) {
+ case Decl::Namespace:
+ case Decl::Record:
+ case Decl::CXXRecord:
+ case Decl::Enum:
+ DeclName = ND->getQualifiedNameAsString();
+ break;
+ case Decl::CXXConstructor:
+ case Decl::CXXDestructor:
+ case Decl::CXXConversion:
+ case Decl::CXXMethod:
+ case Decl::Function:
+ DeclName = GetSignature(dyn_cast_or_null<FunctionDecl>(ND));
+ break;
+ case Decl::ObjCMethod:
+ // ObjC Methods can not be overloaded, qualified name uniquely identifies
+ // the method.
+ DeclName = ND->getQualifiedNameAsString();
+ break;
+ default:
+ break;
+ }
+
+ return DeclName;
+ }
+
+ return "";
+}
+
+static StringRef GetNthLineOfFile(llvm::MemoryBuffer *Buffer, int Line) {
+ if (!Buffer)
+ return "";
+
+ llvm::line_iterator LI(*Buffer, false);
+ for (; !LI.is_at_eof() && LI.line_number() != Line; ++LI)
+ ;
+
+ return *LI;
+}
+
+static std::string NormalizeLine(const SourceManager &SM, FullSourceLoc &L,
+ const Decl *D) {
+ static StringRef Whitespaces = " \t\n";
+
+ const LangOptions &Opts = D->getASTContext().getLangOpts();
+ StringRef Str = GetNthLineOfFile(SM.getBuffer(L.getFileID(), L),
+ L.getExpansionLineNumber());
+ unsigned col = Str.find_first_not_of(Whitespaces);
+
+ SourceLocation StartOfLine =
+ SM.translateLineCol(SM.getFileID(L), L.getExpansionLineNumber(), col);
+ llvm::MemoryBuffer *Buffer =
+ SM.getBuffer(SM.getFileID(StartOfLine), StartOfLine);
+ if (!Buffer)
+ return {};
+
+ const char *BufferPos = SM.getCharacterData(StartOfLine);
+
+ Token Token;
+ Lexer Lexer(SM.getLocForStartOfFile(SM.getFileID(StartOfLine)), Opts,
+ Buffer->getBufferStart(), BufferPos, Buffer->getBufferEnd());
+
+ size_t NextStart = 0;
+ std::ostringstream LineBuff;
+ while (!Lexer.LexFromRawLexer(Token) && NextStart < 2) {
+ if (Token.isAtStartOfLine() && NextStart++ > 0)
+ continue;
+ LineBuff << std::string(SM.getCharacterData(Token.getLocation()),
+ Token.getLength());
+ }
+
+ return LineBuff.str();
+}
+
+static llvm::SmallString<32> GetHashOfContent(StringRef Content) {
+ llvm::MD5 Hash;
+ llvm::MD5::MD5Result MD5Res;
+ SmallString<32> Res;
+
+ Hash.update(Content);
+ Hash.final(MD5Res);
+ llvm::MD5::stringifyResult(MD5Res, Res);
+
+ return Res;
+}
+
+std::string clang::GetIssueString(const SourceManager &SM,
+ FullSourceLoc &IssueLoc,
+ StringRef CheckerName, StringRef BugType,
+ const Decl *D) {
+ static StringRef Delimiter = "$";
+
+ return (llvm::Twine(CheckerName) + Delimiter +
+ GetEnclosingDeclContextSignature(D) + Delimiter +
+ std::to_string(IssueLoc.getExpansionColumnNumber()) + Delimiter +
+ NormalizeLine(SM, IssueLoc, D) + Delimiter + BugType)
+ .str();
+}
+
+SmallString<32> clang::GetIssueHash(const SourceManager &SM,
+ FullSourceLoc &IssueLoc,
+ StringRef CheckerName, StringRef BugType,
+ const Decl *D) {
+ return GetHashOfContent(
+ GetIssueString(SM, IssueLoc, CheckerName, BugType, D));
+}
diff --git a/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp
index 59ed921..2bf4394 100644
--- a/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp
+++ b/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp
@@ -11,13 +11,13 @@
//
//===----------------------------------------------------------------------===//
-#include "clang/StaticAnalyzer/Core/AnalyzerOptions.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/PlistSupport.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/Version.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h"
+#include "clang/StaticAnalyzer/Core/IssueHash.h"
#include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
@@ -390,6 +390,18 @@
o << " <key>check_name</key>";
EmitString(o, D->getCheckName()) << '\n';
+ o << " <!-- This hash is experimental and going to change! -->\n";
+ o << " <key>issue_hash_content_of_line_in_context</key>";
+ PathDiagnosticLocation UPDLoc = D->getUniqueingLoc();
+ FullSourceLoc L(SM->getExpansionLoc(UPDLoc.isValid()
+ ? UPDLoc.asLocation()
+ : D->getLocation().asLocation()),
+ *SM);
+ const Decl *DeclWithIssue = D->getDeclWithIssue();
+ EmitString(o, GetIssueHash(*SM, L, D->getCheckName(), D->getBugType(),
+ DeclWithIssue))
+ << '\n';
+
// Output information about the semantic context where
// the issue occurred.
if (const Decl *DeclWithIssue = D->getDeclWithIssue()) {
@@ -429,22 +441,17 @@
// will have different issue_hashes and that the hash will identify
// the leak location even after code is added between the allocation
// site and the end of scope (leak report location).
- PathDiagnosticLocation UPDLoc = D->getUniqueingLoc();
if (UPDLoc.isValid()) {
- FullSourceLoc UL(SM->getExpansionLoc(UPDLoc.asLocation()),
- *SM);
FullSourceLoc UFunL(SM->getExpansionLoc(
D->getUniqueingDecl()->getBody()->getLocStart()), *SM);
- o << " <key>issue_hash</key><string>"
- << UL.getExpansionLineNumber() - UFunL.getExpansionLineNumber()
+ o << " <key>issue_hash_function_offset</key><string>"
+ << L.getExpansionLineNumber() - UFunL.getExpansionLineNumber()
<< "</string>\n";
// Otherwise, use the location on which the bug is reported.
} else {
- FullSourceLoc L(SM->getExpansionLoc(D->getLocation().asLocation()),
- *SM);
FullSourceLoc FunL(SM->getExpansionLoc(Body->getLocStart()), *SM);
- o << " <key>issue_hash</key><string>"
+ o << " <key>issue_hash_function_offset</key><string>"
<< L.getExpansionLineNumber() - FunL.getExpansionLineNumber()
<< "</string>\n";
}