[analyzer] Make StmtDataCollector customizable
Summary:
This moves the data collection macro calls for Stmt nodes
to lib/AST/StmtDataCollectors.inc
Users can subclass ConstStmtVisitor and include StmtDataCollectors.inc
to define visitor methods for each Stmt subclass. This makes it also
possible to customize the visit methods as exemplified in
lib/Analysis/CloneDetection.cpp.
Move helper methods for data collection to a new module,
AST/DataCollection.
Add data collection for DeclRefExpr, MemberExpr and some literals.
Reviewers: arphaman, teemperor!
Subscribers: mgorny, xazax.hun, cfe-commits
Differential Revision: https://reviews.llvm.org/D36664
llvm-svn: 311569
diff --git a/clang/lib/Analysis/CloneDetection.cpp b/clang/lib/Analysis/CloneDetection.cpp
index 5ea7498..1cce6a4 100644
--- a/clang/lib/Analysis/CloneDetection.cpp
+++ b/clang/lib/Analysis/CloneDetection.cpp
@@ -13,16 +13,12 @@
#include "clang/Analysis/CloneDetection.h"
-#include "clang/AST/ASTContext.h"
-#include "clang/AST/RecursiveASTVisitor.h"
-#include "clang/AST/Stmt.h"
-#include "clang/Lex/Lexer.h"
+#include "clang/AST/DataCollection.h"
+#include "clang/AST/DeclTemplate.h"
#include "llvm/Support/MD5.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Path.h"
using namespace clang;
-using namespace clang::clone_detection;
StmtSequence::StmtSequence(const CompoundStmt *Stmt, const Decl *D,
unsigned StartIndex, unsigned EndIndex)
@@ -91,34 +87,6 @@
return SourceRange(getStartLoc(), getEndLoc());
}
-/// Prints the macro name that contains the given SourceLocation into the given
-/// raw_string_ostream.
-static void printMacroName(llvm::raw_string_ostream &MacroStack,
- ASTContext &Context, SourceLocation Loc) {
- MacroStack << Lexer::getImmediateMacroName(Loc, Context.getSourceManager(),
- Context.getLangOpts());
-
- // Add an empty space at the end as a padding to prevent
- // that macro names concatenate to the names of other macros.
- MacroStack << " ";
-}
-
-std::string clone_detection::getMacroStack(SourceLocation Loc,
- ASTContext &Context) {
- std::string MacroStack;
- llvm::raw_string_ostream MacroStackStream(MacroStack);
- SourceManager &SM = Context.getSourceManager();
-
- // Iterate over all macros that expanded into the given SourceLocation.
- while (Loc.isMacroID()) {
- // Add the macro name to the stream.
- printMacroName(MacroStackStream, Context, Loc);
- Loc = SM.getImmediateMacroCallerLoc(Loc);
- }
- MacroStackStream.flush();
- return MacroStack;
-}
-
void CloneDetector::analyzeCodeBody(const Decl *D) {
assert(D);
assert(D->hasBody());
@@ -184,16 +152,17 @@
}
}
-bool FilenamePatternConstraint::isAutoGenerated(const CloneDetector::CloneGroup &Group) {
+bool FilenamePatternConstraint::isAutoGenerated(
+ const CloneDetector::CloneGroup &Group) {
std::string Error;
- if (IgnoredFilesPattern.empty() || Group.empty() ||
+ if (IgnoredFilesPattern.empty() || Group.empty() ||
!IgnoredFilesRegex->isValid(Error))
return false;
for (const StmtSequence &S : Group) {
const SourceManager &SM = S.getASTContext().getSourceManager();
- StringRef Filename = llvm::sys::path::filename(SM.getFilename(
- S.getContainingDecl()->getLocation()));
+ StringRef Filename = llvm::sys::path::filename(
+ SM.getFilename(S.getContainingDecl()->getLocation()));
if (IgnoredFilesRegex->match(Filename))
return true;
}
@@ -201,6 +170,59 @@
return false;
}
+/// This class defines what a type II code clone is: If it collects for two
+/// statements the same data, then those two statements are considered to be
+/// clones of each other.
+///
+/// All collected data is forwarded to the given data consumer of the type T.
+/// The data consumer class needs to provide a member method with the signature:
+/// update(StringRef Str)
+namespace {
+template <class T>
+class CloneTypeIIStmtDataCollector
+ : public ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>> {
+ ASTContext &Context;
+ /// The data sink to which all data is forwarded.
+ T &DataConsumer;
+
+ template <class Ty> void addData(const Ty &Data) {
+ data_collection::addDataToConsumer(DataConsumer, Data);
+ }
+
+public:
+ CloneTypeIIStmtDataCollector(const Stmt *S, ASTContext &Context,
+ T &DataConsumer)
+ : Context(Context), DataConsumer(DataConsumer) {
+ this->Visit(S);
+ }
+
+// Define a visit method for each class to collect data and subsequently visit
+// all parent classes. This uses a template so that custom visit methods by us
+// take precedence.
+#define DEF_ADD_DATA(CLASS, CODE) \
+ template <class = void> void Visit##CLASS(const CLASS *S) { \
+ CODE; \
+ ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>>::Visit##CLASS(S); \
+ }
+
+#include "../AST/StmtDataCollectors.inc"
+
+// Type II clones ignore variable names and literals, so let's skip them.
+#define SKIP(CLASS) \
+ void Visit##CLASS(const CLASS *S) { \
+ ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>>::Visit##CLASS(S); \
+ }
+ SKIP(DeclRefExpr)
+ SKIP(MemberExpr)
+ SKIP(IntegerLiteral)
+ SKIP(FloatingLiteral)
+ SKIP(StringLiteral)
+ SKIP(CXXBoolLiteralExpr)
+ SKIP(CharacterLiteral)
+#undef SKIP
+};
+} // end anonymous namespace
+
static size_t createHash(llvm::MD5 &Hash) {
size_t HashCode;
@@ -222,7 +244,7 @@
llvm::MD5 Hash;
ASTContext &Context = D->getASTContext();
- StmtDataCollector<llvm::MD5>(S, Context, Hash);
+ CloneTypeIIStmtDataCollector<llvm::MD5>(S, Context, Hash);
auto CS = dyn_cast<CompoundStmt>(S);
SmallVector<size_t, 8> ChildHashes;
@@ -288,8 +310,8 @@
static void CollectStmtSequenceData(const StmtSequence &Sequence,
FoldingSetNodeIDWrapper &OutputData) {
for (const Stmt *S : Sequence) {
- StmtDataCollector<FoldingSetNodeIDWrapper>(S, Sequence.getASTContext(),
- OutputData);
+ CloneTypeIIStmtDataCollector<FoldingSetNodeIDWrapper>(
+ S, Sequence.getASTContext(), OutputData);
for (const Stmt *Child : S->children()) {
if (!Child)
@@ -339,7 +361,7 @@
// Sort hash_codes in StmtsByHash.
std::stable_sort(StmtsByHash.begin(), StmtsByHash.end(),
[](std::pair<size_t, StmtSequence> LHS,
- std::pair<size_t, StmtSequence> RHS) {
+ std::pair<size_t, StmtSequence> RHS) {
return LHS.first < RHS.first;
});
@@ -393,8 +415,10 @@
ASTContext &Context = Seq.getASTContext();
// Look up what macros expanded into the current statement.
- std::string StartMacroStack = getMacroStack(Seq.getStartLoc(), Context);
- std::string EndMacroStack = getMacroStack(Seq.getEndLoc(), Context);
+ std::string StartMacroStack =
+ data_collection::getMacroStack(Seq.getStartLoc(), Context);
+ std::string EndMacroStack =
+ data_collection::getMacroStack(Seq.getEndLoc(), Context);
// First, check if ParentMacroStack is not empty which means we are currently
// dealing with a parent statement which was expanded from a macro.