Blame - clang/lib/Analysis/CloneDetection.cpp - toolchain/llvm-project

blob: e698d3e5c560308cbd9a4df98882bfa303332ba8 [file] [log] [blame]

Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	1	//===--- CloneDetection.cpp - Finds code clones in an AST -------- C++ --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	///
				10	/// This file implements classes for searching and anlyzing source code clones.
				11	///
				12	//===----------------------------------------------------------------------===//
				13
				14	#include "clang/Analysis/CloneDetection.h"
				15
				16	#include "clang/AST/ASTContext.h"
				17	#include "clang/AST/RecursiveASTVisitor.h"
				18	#include "clang/AST/Stmt.h"
Artem Dergachev	51b9a0e	2016-08-20 10:06:59 +0000	[diff] [blame]	19	#include "clang/Lex/Lexer.h"
Artem Dergachev	5657486	2016-08-20 17:35:53 +0000	[diff] [blame]	20	#include "llvm/Support/MD5.h"
Artem Dergachev	51b9a0e	2016-08-20 10:06:59 +0000	[diff] [blame]	21	#include "llvm/Support/raw_ostream.h"
Leslie Zhai	d91d19e	2017-06-19 01:55:50 +0000	[diff] [blame]	22	#include "llvm/Support/Path.h"
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	23
				24	using namespace clang;
Raphael Isemann	983f791	2017-07-09 15:56:39 +0000	[diff] [blame]	25	using namespace clang::clone_detection;
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	26
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	27	StmtSequence::StmtSequence(const CompoundStmt Stmt, const Decl D,
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	28	unsigned StartIndex, unsigned EndIndex)
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	29	: S(Stmt), D(D), StartIndex(StartIndex), EndIndex(EndIndex) {
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	30	assert(Stmt && "Stmt must not be a nullptr");
				31	assert(StartIndex < EndIndex && "Given array should not be empty");
				32	assert(EndIndex <= Stmt->size() && "Given array too big for this Stmt");
				33	}
				34
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	35	StmtSequence::StmtSequence(const Stmt Stmt, const Decl D)
				36	: S(Stmt), D(D), StartIndex(0), EndIndex(0) {}
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	37
				38	StmtSequence::StmtSequence()
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	39	: S(nullptr), D(nullptr), StartIndex(0), EndIndex(0) {}
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	40
				41	bool StmtSequence::contains(const StmtSequence &Other) const {
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	42	// If both sequences reside in different declarations, they can never contain
				43	// each other.
				44	if (D != Other.D)
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	45	return false;
				46
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	47	const SourceManager &SM = getASTContext().getSourceManager();
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	48
				49	// Otherwise check if the start and end locations of the current sequence
				50	// surround the other sequence.
				51	bool StartIsInBounds =
				52	SM.isBeforeInTranslationUnit(getStartLoc(), Other.getStartLoc()) \|\|
				53	getStartLoc() == Other.getStartLoc();
				54	if (!StartIsInBounds)
				55	return false;
				56
				57	bool EndIsInBounds =
				58	SM.isBeforeInTranslationUnit(Other.getEndLoc(), getEndLoc()) \|\|
				59	Other.getEndLoc() == getEndLoc();
				60	return EndIsInBounds;
				61	}
				62
				63	StmtSequence::iterator StmtSequence::begin() const {
				64	if (!holdsSequence()) {
				65	return &S;
				66	}
				67	auto CS = cast<CompoundStmt>(S);
				68	return CS->body_begin() + StartIndex;
				69	}
				70
				71	StmtSequence::iterator StmtSequence::end() const {
				72	if (!holdsSequence()) {
Vassil Vassilev	5721e0f	2016-08-09 10:00:23 +0000	[diff] [blame]	73	return reinterpret_cast<StmtSequence::iterator>(&S) + 1;
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	74	}
				75	auto CS = cast<CompoundStmt>(S);
				76	return CS->body_begin() + EndIndex;
				77	}
				78
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	79	ASTContext &StmtSequence::getASTContext() const {
				80	assert(D);
				81	return D->getASTContext();
				82	}
				83
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	84	SourceLocation StmtSequence::getStartLoc() const {
				85	return front()->getLocStart();
				86	}
				87
				88	SourceLocation StmtSequence::getEndLoc() const { return back()->getLocEnd(); }
				89
Artem Dergachev	4eca0de	2016-10-08 10:54:30 +0000	[diff] [blame]	90	SourceRange StmtSequence::getSourceRange() const {
				91	return SourceRange(getStartLoc(), getEndLoc());
				92	}
				93
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	94	/// Prints the macro name that contains the given SourceLocation into the given
				95	/// raw_string_ostream.
Artem Dergachev	51b9a0e	2016-08-20 10:06:59 +0000	[diff] [blame]	96	static void printMacroName(llvm::raw_string_ostream &MacroStack,
				97	ASTContext &Context, SourceLocation Loc) {
				98	MacroStack << Lexer::getImmediateMacroName(Loc, Context.getSourceManager(),
				99	Context.getLangOpts());
				100
				101	// Add an empty space at the end as a padding to prevent
				102	// that macro names concatenate to the names of other macros.
				103	MacroStack << " ";
				104	}
				105
Raphael Isemann	983f791	2017-07-09 15:56:39 +0000	[diff] [blame]	106	std::string clone_detection::getMacroStack(SourceLocation Loc,
				107	ASTContext &Context) {
Artem Dergachev	51b9a0e	2016-08-20 10:06:59 +0000	[diff] [blame]	108	std::string MacroStack;
				109	llvm::raw_string_ostream MacroStackStream(MacroStack);
				110	SourceManager &SM = Context.getSourceManager();
				111
				112	// Iterate over all macros that expanded into the given SourceLocation.
				113	while (Loc.isMacroID()) {
				114	// Add the macro name to the stream.
				115	printMacroName(MacroStackStream, Context, Loc);
				116	Loc = SM.getImmediateMacroCallerLoc(Loc);
				117	}
				118	MacroStackStream.flush();
				119	return MacroStack;
				120	}
				121
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	122	void CloneDetector::analyzeCodeBody(const Decl *D) {
				123	assert(D);
				124	assert(D->hasBody());
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	125
				126	Sequences.push_back(StmtSequence(D->getBody(), D));
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	127	}
				128
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	129	/// Returns true if and only if \p Stmt contains at least one other
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	130	/// sequence in the \p Group.
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	131	static bool containsAnyInGroup(StmtSequence &Seq,
				132	CloneDetector::CloneGroup &Group) {
				133	for (StmtSequence &GroupSeq : Group) {
				134	if (Seq.contains(GroupSeq))
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	135	return true;
				136	}
				137	return false;
				138	}
				139
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	140	/// Returns true if and only if all sequences in \p OtherGroup are
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	141	/// contained by a sequence in \p Group.
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	142	static bool containsGroup(CloneDetector::CloneGroup &Group,
				143	CloneDetector::CloneGroup &OtherGroup) {
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	144	// We have less sequences in the current group than we have in the other,
				145	// so we will never fulfill the requirement for returning true. This is only
				146	// possible because we know that a sequence in Group can contain at most
				147	// one sequence in OtherGroup.
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	148	if (Group.size() < OtherGroup.size())
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	149	return false;
				150
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	151	for (StmtSequence &Stmt : Group) {
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	152	if (!containsAnyInGroup(Stmt, OtherGroup))
				153	return false;
				154	}
				155	return true;
				156	}
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	157
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	158	void OnlyLargestCloneConstraint::constrain(
				159	std::vector<CloneDetector::CloneGroup> &Result) {
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	160	std::vector<unsigned> IndexesToRemove;
				161
				162	// Compare every group in the result with the rest. If one groups contains
				163	// another group, we only need to return the bigger group.
				164	// Note: This doesn't scale well, so if possible avoid calling any heavy
				165	// function from this loop to minimize the performance impact.
				166	for (unsigned i = 0; i < Result.size(); ++i) {
				167	for (unsigned j = 0; j < Result.size(); ++j) {
				168	// Don't compare a group with itself.
				169	if (i == j)
				170	continue;
				171
				172	if (containsGroup(Result[j], Result[i])) {
				173	IndexesToRemove.push_back(i);
				174	break;
				175	}
				176	}
				177	}
				178
				179	// Erasing a list of indexes from the vector should be done with decreasing
				180	// indexes. As IndexesToRemove is constructed with increasing values, we just
				181	// reverse iterate over it to get the desired order.
				182	for (auto I = IndexesToRemove.rbegin(); I != IndexesToRemove.rend(); ++I) {
				183	Result.erase(Result.begin() + *I);
				184	}
				185	}
Artem Dergachev	2fc1985	2016-08-18 12:29:41 +0000	[diff] [blame]	186
Leslie Zhai	104b6fe	2017-06-20 06:44:46 +0000	[diff] [blame]	187	bool FilenamePatternConstraint::isAutoGenerated(const CloneDetector::CloneGroup &Group) {
Leslie Zhai	d91d19e	2017-06-19 01:55:50 +0000	[diff] [blame]	188	std::string Error;
				189	if (IgnoredFilesPattern.empty() \|\| Group.empty() \|\|
				190	!IgnoredFilesRegex->isValid(Error))
				191	return false;
				192
				193	for (const StmtSequence &S : Group) {
				194	const SourceManager &SM = S.getASTContext().getSourceManager();
				195	StringRef Filename = llvm::sys::path::filename(SM.getFilename(
				196	S.getContainingDecl()->getLocation()));
				197	if (IgnoredFilesRegex->match(Filename))
				198	return true;
				199	}
				200
				201	return false;
				202	}
				203
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	204	static size_t createHash(llvm::MD5 &Hash) {
				205	size_t HashCode;
Artem Dergachev	2fc1985	2016-08-18 12:29:41 +0000	[diff] [blame]	206
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	207	// Create the final hash code for the current Stmt.
				208	llvm::MD5::MD5Result HashResult;
				209	Hash.final(HashResult);
Artem Dergachev	2fc1985	2016-08-18 12:29:41 +0000	[diff] [blame]	210
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	211	// Copy as much as possible of the generated hash code to the Stmt's hash
				212	// code.
				213	std::memcpy(&HashCode, &HashResult,
				214	std::min(sizeof(HashCode), sizeof(HashResult)));
Artem Dergachev	2fc1985	2016-08-18 12:29:41 +0000	[diff] [blame]	215
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	216	return HashCode;
				217	}
				218
				219	size_t RecursiveCloneTypeIIConstraint::saveHash(
				220	const Stmt S, const Decl D,
				221	std::vector<std::pair<size_t, StmtSequence>> &StmtsByHash) {
				222	llvm::MD5 Hash;
				223	ASTContext &Context = D->getASTContext();
				224
				225	StmtDataCollector<llvm::MD5>(S, Context, Hash);
				226
				227	auto CS = dyn_cast<CompoundStmt>(S);
				228	SmallVector<size_t, 8> ChildHashes;
				229
				230	for (const Stmt *Child : S->children()) {
				231	if (Child == nullptr) {
				232	ChildHashes.push_back(0);
				233	continue;
				234	}
				235	size_t ChildHash = saveHash(Child, D, StmtsByHash);
				236	Hash.update(
				237	StringRef(reinterpret_cast<char *>(&ChildHash), sizeof(ChildHash)));
				238	ChildHashes.push_back(ChildHash);
				239	}
				240
				241	if (CS) {
				242	for (unsigned Length = 2; Length <= CS->size(); ++Length) {
				243	for (unsigned Pos = 0; Pos <= CS->size() - Length; ++Pos) {
				244	llvm::MD5 Hash;
				245	for (unsigned i = Pos; i < Pos + Length; ++i) {
				246	size_t ChildHash = ChildHashes[i];
				247	Hash.update(StringRef(reinterpret_cast<char *>(&ChildHash),
				248	sizeof(ChildHash)));
Artem Dergachev	f8b4fc3	2017-04-05 14:17:36 +0000	[diff] [blame]	249	}
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	250	StmtsByHash.push_back(std::make_pair(
				251	createHash(Hash), StmtSequence(CS, D, Pos, Pos + Length)));
Artem Dergachev	f8b4fc3	2017-04-05 14:17:36 +0000	[diff] [blame]	252	}
Artem Dergachev	f8b4fc3	2017-04-05 14:17:36 +0000	[diff] [blame]	253	}
				254	}
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	255
				256	size_t HashCode = createHash(Hash);
				257	StmtsByHash.push_back(std::make_pair(HashCode, StmtSequence(S, D)));
				258	return HashCode;
				259	}
				260
				261	namespace {
				262	/// Wrapper around FoldingSetNodeID that it can be used as the template
				263	/// argument of the StmtDataCollector.
				264	class FoldingSetNodeIDWrapper {
				265
				266	llvm::FoldingSetNodeID &FS;
				267
				268	public:
				269	FoldingSetNodeIDWrapper(llvm::FoldingSetNodeID &FS) : FS(FS) {}
				270
				271	void update(StringRef Str) { FS.AddString(Str); }
				272	};
				273	} // end anonymous namespace
				274
				275	/// Writes the relevant data from all statements and child statements
				276	/// in the given StmtSequence into the given FoldingSetNodeID.
				277	static void CollectStmtSequenceData(const StmtSequence &Sequence,
				278	FoldingSetNodeIDWrapper &OutputData) {
				279	for (const Stmt *S : Sequence) {
				280	StmtDataCollector<FoldingSetNodeIDWrapper>(S, Sequence.getASTContext(),
				281	OutputData);
				282
				283	for (const Stmt *Child : S->children()) {
				284	if (!Child)
				285	continue;
				286
				287	CollectStmtSequenceData(StmtSequence(Child, Sequence.getContainingDecl()),
				288	OutputData);
				289	}
				290	}
				291	}
				292
				293	/// Returns true if both sequences are clones of each other.
				294	static bool areSequencesClones(const StmtSequence &LHS,
				295	const StmtSequence &RHS) {
				296	// We collect the data from all statements in the sequence as we did before
				297	// when generating a hash value for each sequence. But this time we don't
				298	// hash the collected data and compare the whole data set instead. This
				299	// prevents any false-positives due to hash code collisions.
				300	llvm::FoldingSetNodeID DataLHS, DataRHS;
				301	FoldingSetNodeIDWrapper LHSWrapper(DataLHS);
				302	FoldingSetNodeIDWrapper RHSWrapper(DataRHS);
				303
				304	CollectStmtSequenceData(LHS, LHSWrapper);
				305	CollectStmtSequenceData(RHS, RHSWrapper);
				306
				307	return DataLHS == DataRHS;
				308	}
				309
				310	void RecursiveCloneTypeIIConstraint::constrain(
				311	std::vector<CloneDetector::CloneGroup> &Sequences) {
				312	// FIXME: Maybe we can do this in-place and don't need this additional vector.
				313	std::vector<CloneDetector::CloneGroup> Result;
				314
				315	for (CloneDetector::CloneGroup &Group : Sequences) {
				316	// We assume in the following code that the Group is non-empty, so we
				317	// skip all empty groups.
				318	if (Group.empty())
				319	continue;
				320
				321	std::vector<std::pair<size_t, StmtSequence>> StmtsByHash;
				322
				323	// Generate hash codes for all children of S and save them in StmtsByHash.
				324	for (const StmtSequence &S : Group) {
				325	saveHash(S.front(), S.getContainingDecl(), StmtsByHash);
				326	}
				327
				328	// Sort hash_codes in StmtsByHash.
				329	std::stable_sort(StmtsByHash.begin(), StmtsByHash.end(),
Ivan Krasin	1e1acbc	2017-04-06 17:42:05 +0000	[diff] [blame]	330	[](std::pair<size_t, StmtSequence> LHS,
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	331	std::pair<size_t, StmtSequence> RHS) {
				332	return LHS.first < RHS.first;
				333	});
				334
				335	// Check for each StmtSequence if its successor has the same hash value.
				336	// We don't check the last StmtSequence as it has no successor.
				337	// Note: The 'size - 1 ' in the condition is safe because we check for an
				338	// empty Group vector at the beginning of this function.
				339	for (unsigned i = 0; i < StmtsByHash.size() - 1; ++i) {
				340	const auto Current = StmtsByHash[i];
				341
				342	// It's likely that we just found an sequence of StmtSequences that
				343	// represent a CloneGroup, so we create a new group and start checking and
				344	// adding the StmtSequences in this sequence.
				345	CloneDetector::CloneGroup NewGroup;
				346
				347	size_t PrototypeHash = Current.first;
				348
				349	for (; i < StmtsByHash.size(); ++i) {
				350	// A different hash value means we have reached the end of the sequence.
				351	if (PrototypeHash != StmtsByHash[i].first \|\|
				352	!areSequencesClones(StmtsByHash[i].second, Current.second)) {
				353	// The current sequence could be the start of a new CloneGroup. So we
				354	// decrement i so that we visit it again in the outer loop.
				355	// Note: i can never be 0 at this point because we are just comparing
				356	// the hash of the Current StmtSequence with itself in the 'if' above.
				357	assert(i != 0);
				358	--i;
				359	break;
				360	}
				361	// Same hash value means we should add the StmtSequence to the current
				362	// group.
				363	NewGroup.push_back(StmtsByHash[i].second);
				364	}
				365
				366	// We created a new clone group with matching hash codes and move it to
				367	// the result vector.
				368	Result.push_back(NewGroup);
				369	}
				370	}
				371	// Sequences is the output parameter, so we copy our result into it.
				372	Sequences = Result;
				373	}
				374
				375	size_t MinComplexityConstraint::calculateStmtComplexity(
				376	const StmtSequence &Seq, const std::string &ParentMacroStack) {
				377	if (Seq.empty())
				378	return 0;
				379
				380	size_t Complexity = 1;
				381
				382	ASTContext &Context = Seq.getASTContext();
				383
				384	// Look up what macros expanded into the current statement.
				385	std::string StartMacroStack = getMacroStack(Seq.getStartLoc(), Context);
				386	std::string EndMacroStack = getMacroStack(Seq.getEndLoc(), Context);
				387
				388	// First, check if ParentMacroStack is not empty which means we are currently
				389	// dealing with a parent statement which was expanded from a macro.
				390	// If this parent statement was expanded from the same macros as this
				391	// statement, we reduce the initial complexity of this statement to zero.
				392	// This causes that a group of statements that were generated by a single
				393	// macro expansion will only increase the total complexity by one.
				394	// Note: This is not the final complexity of this statement as we still
				395	// add the complexity of the child statements to the complexity value.
				396	if (!ParentMacroStack.empty() && (StartMacroStack == ParentMacroStack &&
				397	EndMacroStack == ParentMacroStack)) {
				398	Complexity = 0;
				399	}
				400
				401	// Iterate over the Stmts in the StmtSequence and add their complexity values
				402	// to the current complexity value.
				403	if (Seq.holdsSequence()) {
				404	for (const Stmt *S : Seq) {
				405	Complexity += calculateStmtComplexity(
				406	StmtSequence(S, Seq.getContainingDecl()), StartMacroStack);
				407	}
				408	} else {
				409	for (const Stmt *S : Seq.front()->children()) {
				410	Complexity += calculateStmtComplexity(
				411	StmtSequence(S, Seq.getContainingDecl()), StartMacroStack);
				412	}
				413	}
				414	return Complexity;
				415	}
				416
				417	void MatchingVariablePatternConstraint::constrain(
				418	std::vector<CloneDetector::CloneGroup> &CloneGroups) {
				419	CloneConstraint::splitCloneGroups(
				420	CloneGroups, [](const StmtSequence &A, const StmtSequence &B) {
				421	VariablePattern PatternA(A);
				422	VariablePattern PatternB(B);
				423	return PatternA.countPatternDifferences(PatternB) == 0;
				424	});
				425	}
				426
				427	void CloneConstraint::splitCloneGroups(
				428	std::vector<CloneDetector::CloneGroup> &CloneGroups,
				429	std::function<bool(const StmtSequence &, const StmtSequence &)> Compare) {
				430	std::vector<CloneDetector::CloneGroup> Result;
				431	for (auto &HashGroup : CloneGroups) {
				432	// Contains all indexes in HashGroup that were already added to a
				433	// CloneGroup.
				434	std::vector<char> Indexes;
				435	Indexes.resize(HashGroup.size());
				436
				437	for (unsigned i = 0; i < HashGroup.size(); ++i) {
				438	// Skip indexes that are already part of a CloneGroup.
				439	if (Indexes[i])
				440	continue;
				441
				442	// Pick the first unhandled StmtSequence and consider it as the
				443	// beginning
				444	// of a new CloneGroup for now.
				445	// We don't add i to Indexes because we never iterate back.
				446	StmtSequence Prototype = HashGroup[i];
				447	CloneDetector::CloneGroup PotentialGroup = {Prototype};
				448	++Indexes[i];
				449
				450	// Check all following StmtSequences for clones.
				451	for (unsigned j = i + 1; j < HashGroup.size(); ++j) {
				452	// Skip indexes that are already part of a CloneGroup.
				453	if (Indexes[j])
				454	continue;
				455
Raphael Isemann	676b457	2017-06-21 05:41:39 +0000	[diff] [blame]	456	// If a following StmtSequence belongs to our CloneGroup, we add it.
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	457	const StmtSequence &Candidate = HashGroup[j];
				458
				459	if (!Compare(Prototype, Candidate))
				460	continue;
				461
				462	PotentialGroup.push_back(Candidate);
				463	// Make sure we never visit this StmtSequence again.
				464	++Indexes[j];
				465	}
				466
				467	// Otherwise, add it to the result and continue searching for more
				468	// groups.
				469	Result.push_back(PotentialGroup);
				470	}
				471
				472	assert(std::all_of(Indexes.begin(), Indexes.end(),
				473	[](char c) { return c == 1; }));
				474	}
				475	CloneGroups = Result;
				476	}
				477
				478	void VariablePattern::addVariableOccurence(const VarDecl *VarDecl,
				479	const Stmt *Mention) {
				480	// First check if we already reference this variable
				481	for (size_t KindIndex = 0; KindIndex < Variables.size(); ++KindIndex) {
				482	if (Variables[KindIndex] == VarDecl) {
				483	// If yes, add a new occurence that points to the existing entry in
				484	// the Variables vector.
				485	Occurences.emplace_back(KindIndex, Mention);
				486	return;
				487	}
				488	}
				489	// If this variable wasn't already referenced, add it to the list of
				490	// referenced variables and add a occurence that points to this new entry.
				491	Occurences.emplace_back(Variables.size(), Mention);
				492	Variables.push_back(VarDecl);
				493	}
				494
				495	void VariablePattern::addVariables(const Stmt *S) {
				496	// Sometimes we get a nullptr (such as from IfStmts which often have nullptr
				497	// children). We skip such statements as they don't reference any
				498	// variables.
				499	if (!S)
				500	return;
				501
				502	// Check if S is a reference to a variable. If yes, add it to the pattern.
				503	if (auto D = dyn_cast<DeclRefExpr>(S)) {
				504	if (auto VD = dyn_cast<VarDecl>(D->getDecl()->getCanonicalDecl()))
				505	addVariableOccurence(VD, D);
				506	}
				507
				508	// Recursively check all children of the given statement.
				509	for (const Stmt *Child : S->children()) {
				510	addVariables(Child);
				511	}
				512	}
				513
				514	unsigned VariablePattern::countPatternDifferences(
				515	const VariablePattern &Other,
				516	VariablePattern::SuspiciousClonePair *FirstMismatch) {
				517	unsigned NumberOfDifferences = 0;
				518
				519	assert(Other.Occurences.size() == Occurences.size());
				520	for (unsigned i = 0; i < Occurences.size(); ++i) {
				521	auto ThisOccurence = Occurences[i];
				522	auto OtherOccurence = Other.Occurences[i];
				523	if (ThisOccurence.KindID == OtherOccurence.KindID)
				524	continue;
				525
				526	++NumberOfDifferences;
				527
				528	// If FirstMismatch is not a nullptr, we need to store information about
				529	// the first difference between the two patterns.
				530	if (FirstMismatch == nullptr)
				531	continue;
				532
				533	// Only proceed if we just found the first difference as we only store
				534	// information about the first difference.
				535	if (NumberOfDifferences != 1)
				536	continue;
				537
				538	const VarDecl *FirstSuggestion = nullptr;
				539	// If there is a variable available in the list of referenced variables
				540	// which wouldn't break the pattern if it is used in place of the
				541	// current variable, we provide this variable as the suggested fix.
				542	if (OtherOccurence.KindID < Variables.size())
				543	FirstSuggestion = Variables[OtherOccurence.KindID];
				544
				545	// Store information about the first clone.
				546	FirstMismatch->FirstCloneInfo =
				547	VariablePattern::SuspiciousClonePair::SuspiciousCloneInfo(
				548	Variables[ThisOccurence.KindID], ThisOccurence.Mention,
				549	FirstSuggestion);
				550
				551	// Same as above but with the other clone. We do this for both clones as
				552	// we don't know which clone is the one containing the unintended
				553	// pattern error.
				554	const VarDecl *SecondSuggestion = nullptr;
				555	if (ThisOccurence.KindID < Other.Variables.size())
				556	SecondSuggestion = Other.Variables[ThisOccurence.KindID];
				557
				558	// Store information about the second clone.
				559	FirstMismatch->SecondCloneInfo =
				560	VariablePattern::SuspiciousClonePair::SuspiciousCloneInfo(
				561	Other.Variables[OtherOccurence.KindID], OtherOccurence.Mention,
				562	SecondSuggestion);
				563
				564	// SuspiciousClonePair guarantees that the first clone always has a
				565	// suggested variable associated with it. As we know that one of the two
				566	// clones in the pair always has suggestion, we swap the two clones
				567	// in case the first clone has no suggested variable which means that
				568	// the second clone has a suggested variable and should be first.
				569	if (!FirstMismatch->FirstCloneInfo.Suggestion)
				570	std::swap(FirstMismatch->FirstCloneInfo, FirstMismatch->SecondCloneInfo);
				571
				572	// This ensures that we always have at least one suggestion in a pair.
				573	assert(FirstMismatch->FirstCloneInfo.Suggestion);
				574	}
				575
				576	return NumberOfDifferences;
Artem Dergachev	2fc1985	2016-08-18 12:29:41 +0000	[diff] [blame]	577	}