Blame - clang/lib/Analysis/CloneDetection.cpp - toolchain/llvm-project

blob: 5ea74989a7ec999d1be02a848592163f2b1d7ba3 [file] [log] [blame]

Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	1	//===--- CloneDetection.cpp - Finds code clones in an AST -------- C++ --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	///
				10	/// This file implements classes for searching and anlyzing source code clones.
				11	///
				12	//===----------------------------------------------------------------------===//
				13
				14	#include "clang/Analysis/CloneDetection.h"
				15
				16	#include "clang/AST/ASTContext.h"
				17	#include "clang/AST/RecursiveASTVisitor.h"
				18	#include "clang/AST/Stmt.h"
Artem Dergachev	51b9a0e	2016-08-20 10:06:59 +0000	[diff] [blame]	19	#include "clang/Lex/Lexer.h"
Artem Dergachev	5657486	2016-08-20 17:35:53 +0000	[diff] [blame]	20	#include "llvm/Support/MD5.h"
Artem Dergachev	51b9a0e	2016-08-20 10:06:59 +0000	[diff] [blame]	21	#include "llvm/Support/raw_ostream.h"
Leslie Zhai	d91d19e	2017-06-19 01:55:50 +0000	[diff] [blame]	22	#include "llvm/Support/Path.h"
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	23
				24	using namespace clang;
Raphael Isemann	983f791	2017-07-09 15:56:39 +0000	[diff] [blame]	25	using namespace clang::clone_detection;
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	26
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	27	StmtSequence::StmtSequence(const CompoundStmt Stmt, const Decl D,
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	28	unsigned StartIndex, unsigned EndIndex)
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	29	: S(Stmt), D(D), StartIndex(StartIndex), EndIndex(EndIndex) {
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	30	assert(Stmt && "Stmt must not be a nullptr");
				31	assert(StartIndex < EndIndex && "Given array should not be empty");
				32	assert(EndIndex <= Stmt->size() && "Given array too big for this Stmt");
				33	}
				34
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	35	StmtSequence::StmtSequence(const Stmt Stmt, const Decl D)
				36	: S(Stmt), D(D), StartIndex(0), EndIndex(0) {}
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	37
				38	StmtSequence::StmtSequence()
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	39	: S(nullptr), D(nullptr), StartIndex(0), EndIndex(0) {}
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	40
				41	bool StmtSequence::contains(const StmtSequence &Other) const {
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	42	// If both sequences reside in different declarations, they can never contain
				43	// each other.
				44	if (D != Other.D)
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	45	return false;
				46
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	47	const SourceManager &SM = getASTContext().getSourceManager();
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	48
				49	// Otherwise check if the start and end locations of the current sequence
				50	// surround the other sequence.
				51	bool StartIsInBounds =
				52	SM.isBeforeInTranslationUnit(getStartLoc(), Other.getStartLoc()) \|\|
				53	getStartLoc() == Other.getStartLoc();
				54	if (!StartIsInBounds)
				55	return false;
				56
				57	bool EndIsInBounds =
				58	SM.isBeforeInTranslationUnit(Other.getEndLoc(), getEndLoc()) \|\|
				59	Other.getEndLoc() == getEndLoc();
				60	return EndIsInBounds;
				61	}
				62
				63	StmtSequence::iterator StmtSequence::begin() const {
				64	if (!holdsSequence()) {
				65	return &S;
				66	}
				67	auto CS = cast<CompoundStmt>(S);
				68	return CS->body_begin() + StartIndex;
				69	}
				70
				71	StmtSequence::iterator StmtSequence::end() const {
				72	if (!holdsSequence()) {
Vassil Vassilev	5721e0f	2016-08-09 10:00:23 +0000	[diff] [blame]	73	return reinterpret_cast<StmtSequence::iterator>(&S) + 1;
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	74	}
				75	auto CS = cast<CompoundStmt>(S);
				76	return CS->body_begin() + EndIndex;
				77	}
				78
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	79	ASTContext &StmtSequence::getASTContext() const {
				80	assert(D);
				81	return D->getASTContext();
				82	}
				83
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	84	SourceLocation StmtSequence::getStartLoc() const {
				85	return front()->getLocStart();
				86	}
				87
				88	SourceLocation StmtSequence::getEndLoc() const { return back()->getLocEnd(); }
				89
Artem Dergachev	4eca0de	2016-10-08 10:54:30 +0000	[diff] [blame]	90	SourceRange StmtSequence::getSourceRange() const {
				91	return SourceRange(getStartLoc(), getEndLoc());
				92	}
				93
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	94	/// Prints the macro name that contains the given SourceLocation into the given
				95	/// raw_string_ostream.
Artem Dergachev	51b9a0e	2016-08-20 10:06:59 +0000	[diff] [blame]	96	static void printMacroName(llvm::raw_string_ostream &MacroStack,
				97	ASTContext &Context, SourceLocation Loc) {
				98	MacroStack << Lexer::getImmediateMacroName(Loc, Context.getSourceManager(),
				99	Context.getLangOpts());
				100
				101	// Add an empty space at the end as a padding to prevent
				102	// that macro names concatenate to the names of other macros.
				103	MacroStack << " ";
				104	}
				105
Raphael Isemann	983f791	2017-07-09 15:56:39 +0000	[diff] [blame]	106	std::string clone_detection::getMacroStack(SourceLocation Loc,
				107	ASTContext &Context) {
Artem Dergachev	51b9a0e	2016-08-20 10:06:59 +0000	[diff] [blame]	108	std::string MacroStack;
				109	llvm::raw_string_ostream MacroStackStream(MacroStack);
				110	SourceManager &SM = Context.getSourceManager();
				111
				112	// Iterate over all macros that expanded into the given SourceLocation.
				113	while (Loc.isMacroID()) {
				114	// Add the macro name to the stream.
				115	printMacroName(MacroStackStream, Context, Loc);
				116	Loc = SM.getImmediateMacroCallerLoc(Loc);
				117	}
				118	MacroStackStream.flush();
				119	return MacroStack;
				120	}
				121
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	122	void CloneDetector::analyzeCodeBody(const Decl *D) {
				123	assert(D);
				124	assert(D->hasBody());
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	125
				126	Sequences.push_back(StmtSequence(D->getBody(), D));
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	127	}
				128
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	129	/// Returns true if and only if \p Stmt contains at least one other
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	130	/// sequence in the \p Group.
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	131	static bool containsAnyInGroup(StmtSequence &Seq,
				132	CloneDetector::CloneGroup &Group) {
				133	for (StmtSequence &GroupSeq : Group) {
				134	if (Seq.contains(GroupSeq))
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	135	return true;
				136	}
				137	return false;
				138	}
				139
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	140	/// Returns true if and only if all sequences in \p OtherGroup are
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	141	/// contained by a sequence in \p Group.
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	142	static bool containsGroup(CloneDetector::CloneGroup &Group,
				143	CloneDetector::CloneGroup &OtherGroup) {
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	144	// We have less sequences in the current group than we have in the other,
				145	// so we will never fulfill the requirement for returning true. This is only
				146	// possible because we know that a sequence in Group can contain at most
				147	// one sequence in OtherGroup.
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	148	if (Group.size() < OtherGroup.size())
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	149	return false;
				150
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	151	for (StmtSequence &Stmt : Group) {
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	152	if (!containsAnyInGroup(Stmt, OtherGroup))
				153	return false;
				154	}
				155	return true;
				156	}
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	157
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	158	void OnlyLargestCloneConstraint::constrain(
				159	std::vector<CloneDetector::CloneGroup> &Result) {
Artem Dergachev	ba81632	2016-07-26 18:13:12 +0000	[diff] [blame]	160	std::vector<unsigned> IndexesToRemove;
				161
				162	// Compare every group in the result with the rest. If one groups contains
				163	// another group, we only need to return the bigger group.
				164	// Note: This doesn't scale well, so if possible avoid calling any heavy
				165	// function from this loop to minimize the performance impact.
				166	for (unsigned i = 0; i < Result.size(); ++i) {
				167	for (unsigned j = 0; j < Result.size(); ++j) {
				168	// Don't compare a group with itself.
				169	if (i == j)
				170	continue;
				171
				172	if (containsGroup(Result[j], Result[i])) {
				173	IndexesToRemove.push_back(i);
				174	break;
				175	}
				176	}
				177	}
				178
				179	// Erasing a list of indexes from the vector should be done with decreasing
				180	// indexes. As IndexesToRemove is constructed with increasing values, we just
				181	// reverse iterate over it to get the desired order.
				182	for (auto I = IndexesToRemove.rbegin(); I != IndexesToRemove.rend(); ++I) {
				183	Result.erase(Result.begin() + *I);
				184	}
				185	}
Artem Dergachev	2fc1985	2016-08-18 12:29:41 +0000	[diff] [blame]	186
Leslie Zhai	104b6fe	2017-06-20 06:44:46 +0000	[diff] [blame]	187	bool FilenamePatternConstraint::isAutoGenerated(const CloneDetector::CloneGroup &Group) {
Leslie Zhai	d91d19e	2017-06-19 01:55:50 +0000	[diff] [blame]	188	std::string Error;
				189	if (IgnoredFilesPattern.empty() \|\| Group.empty() \|\|
				190	!IgnoredFilesRegex->isValid(Error))
				191	return false;
				192
				193	for (const StmtSequence &S : Group) {
				194	const SourceManager &SM = S.getASTContext().getSourceManager();
				195	StringRef Filename = llvm::sys::path::filename(SM.getFilename(
				196	S.getContainingDecl()->getLocation()));
				197	if (IgnoredFilesRegex->match(Filename))
				198	return true;
				199	}
				200
				201	return false;
				202	}
				203
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	204	static size_t createHash(llvm::MD5 &Hash) {
				205	size_t HashCode;
Artem Dergachev	2fc1985	2016-08-18 12:29:41 +0000	[diff] [blame]	206
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	207	// Create the final hash code for the current Stmt.
				208	llvm::MD5::MD5Result HashResult;
				209	Hash.final(HashResult);
Artem Dergachev	2fc1985	2016-08-18 12:29:41 +0000	[diff] [blame]	210
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	211	// Copy as much as possible of the generated hash code to the Stmt's hash
				212	// code.
				213	std::memcpy(&HashCode, &HashResult,
				214	std::min(sizeof(HashCode), sizeof(HashResult)));
Artem Dergachev	2fc1985	2016-08-18 12:29:41 +0000	[diff] [blame]	215
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	216	return HashCode;
				217	}
				218
				219	size_t RecursiveCloneTypeIIConstraint::saveHash(
				220	const Stmt S, const Decl D,
				221	std::vector<std::pair<size_t, StmtSequence>> &StmtsByHash) {
				222	llvm::MD5 Hash;
				223	ASTContext &Context = D->getASTContext();
				224
				225	StmtDataCollector<llvm::MD5>(S, Context, Hash);
				226
				227	auto CS = dyn_cast<CompoundStmt>(S);
				228	SmallVector<size_t, 8> ChildHashes;
				229
				230	for (const Stmt *Child : S->children()) {
				231	if (Child == nullptr) {
				232	ChildHashes.push_back(0);
				233	continue;
				234	}
				235	size_t ChildHash = saveHash(Child, D, StmtsByHash);
				236	Hash.update(
				237	StringRef(reinterpret_cast<char *>(&ChildHash), sizeof(ChildHash)));
				238	ChildHashes.push_back(ChildHash);
				239	}
				240
				241	if (CS) {
Raphael Isemann	4eac9f0	2017-07-09 21:14:36 +0000	[diff] [blame^]	242	// If we're in a CompoundStmt, we hash all possible combinations of child
				243	// statements to find clones in those subsequences.
				244	// We first go through every possible starting position of a subsequence.
				245	for (unsigned Pos = 0; Pos < CS->size(); ++Pos) {
				246	// Then we try all possible lengths this subsequence could have and
				247	// reuse the same hash object to make sure we only hash every child
				248	// hash exactly once.
				249	llvm::MD5 Hash;
				250	for (unsigned Length = 1; Length <= CS->size() - Pos; ++Length) {
				251	// Grab the current child hash and put it into our hash. We do
				252	// -1 on the index because we start counting the length at 1.
				253	size_t ChildHash = ChildHashes[Pos + Length - 1];
				254	Hash.update(
				255	StringRef(reinterpret_cast<char *>(&ChildHash), sizeof(ChildHash)));
				256	// If we have at least two elements in our subsequence, we can start
				257	// saving it.
				258	if (Length > 1) {
				259	llvm::MD5 SubHash = Hash;
				260	StmtsByHash.push_back(std::make_pair(
				261	createHash(SubHash), StmtSequence(CS, D, Pos, Pos + Length)));
Artem Dergachev	f8b4fc3	2017-04-05 14:17:36 +0000	[diff] [blame]	262	}
Artem Dergachev	f8b4fc3	2017-04-05 14:17:36 +0000	[diff] [blame]	263	}
Artem Dergachev	f8b4fc3	2017-04-05 14:17:36 +0000	[diff] [blame]	264	}
				265	}
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	266
				267	size_t HashCode = createHash(Hash);
				268	StmtsByHash.push_back(std::make_pair(HashCode, StmtSequence(S, D)));
				269	return HashCode;
				270	}
				271
				272	namespace {
				273	/// Wrapper around FoldingSetNodeID that it can be used as the template
				274	/// argument of the StmtDataCollector.
				275	class FoldingSetNodeIDWrapper {
				276
				277	llvm::FoldingSetNodeID &FS;
				278
				279	public:
				280	FoldingSetNodeIDWrapper(llvm::FoldingSetNodeID &FS) : FS(FS) {}
				281
				282	void update(StringRef Str) { FS.AddString(Str); }
				283	};
				284	} // end anonymous namespace
				285
				286	/// Writes the relevant data from all statements and child statements
				287	/// in the given StmtSequence into the given FoldingSetNodeID.
				288	static void CollectStmtSequenceData(const StmtSequence &Sequence,
				289	FoldingSetNodeIDWrapper &OutputData) {
				290	for (const Stmt *S : Sequence) {
				291	StmtDataCollector<FoldingSetNodeIDWrapper>(S, Sequence.getASTContext(),
				292	OutputData);
				293
				294	for (const Stmt *Child : S->children()) {
				295	if (!Child)
				296	continue;
				297
				298	CollectStmtSequenceData(StmtSequence(Child, Sequence.getContainingDecl()),
				299	OutputData);
				300	}
				301	}
				302	}
				303
				304	/// Returns true if both sequences are clones of each other.
				305	static bool areSequencesClones(const StmtSequence &LHS,
				306	const StmtSequence &RHS) {
				307	// We collect the data from all statements in the sequence as we did before
				308	// when generating a hash value for each sequence. But this time we don't
				309	// hash the collected data and compare the whole data set instead. This
				310	// prevents any false-positives due to hash code collisions.
				311	llvm::FoldingSetNodeID DataLHS, DataRHS;
				312	FoldingSetNodeIDWrapper LHSWrapper(DataLHS);
				313	FoldingSetNodeIDWrapper RHSWrapper(DataRHS);
				314
				315	CollectStmtSequenceData(LHS, LHSWrapper);
				316	CollectStmtSequenceData(RHS, RHSWrapper);
				317
				318	return DataLHS == DataRHS;
				319	}
				320
				321	void RecursiveCloneTypeIIConstraint::constrain(
				322	std::vector<CloneDetector::CloneGroup> &Sequences) {
				323	// FIXME: Maybe we can do this in-place and don't need this additional vector.
				324	std::vector<CloneDetector::CloneGroup> Result;
				325
				326	for (CloneDetector::CloneGroup &Group : Sequences) {
				327	// We assume in the following code that the Group is non-empty, so we
				328	// skip all empty groups.
				329	if (Group.empty())
				330	continue;
				331
				332	std::vector<std::pair<size_t, StmtSequence>> StmtsByHash;
				333
				334	// Generate hash codes for all children of S and save them in StmtsByHash.
				335	for (const StmtSequence &S : Group) {
				336	saveHash(S.front(), S.getContainingDecl(), StmtsByHash);
				337	}
				338
				339	// Sort hash_codes in StmtsByHash.
				340	std::stable_sort(StmtsByHash.begin(), StmtsByHash.end(),
Ivan Krasin	1e1acbc	2017-04-06 17:42:05 +0000	[diff] [blame]	341	[](std::pair<size_t, StmtSequence> LHS,
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	342	std::pair<size_t, StmtSequence> RHS) {
				343	return LHS.first < RHS.first;
				344	});
				345
				346	// Check for each StmtSequence if its successor has the same hash value.
				347	// We don't check the last StmtSequence as it has no successor.
				348	// Note: The 'size - 1 ' in the condition is safe because we check for an
				349	// empty Group vector at the beginning of this function.
				350	for (unsigned i = 0; i < StmtsByHash.size() - 1; ++i) {
				351	const auto Current = StmtsByHash[i];
				352
				353	// It's likely that we just found an sequence of StmtSequences that
				354	// represent a CloneGroup, so we create a new group and start checking and
				355	// adding the StmtSequences in this sequence.
				356	CloneDetector::CloneGroup NewGroup;
				357
				358	size_t PrototypeHash = Current.first;
				359
				360	for (; i < StmtsByHash.size(); ++i) {
				361	// A different hash value means we have reached the end of the sequence.
				362	if (PrototypeHash != StmtsByHash[i].first \|\|
				363	!areSequencesClones(StmtsByHash[i].second, Current.second)) {
				364	// The current sequence could be the start of a new CloneGroup. So we
				365	// decrement i so that we visit it again in the outer loop.
				366	// Note: i can never be 0 at this point because we are just comparing
				367	// the hash of the Current StmtSequence with itself in the 'if' above.
				368	assert(i != 0);
				369	--i;
				370	break;
				371	}
				372	// Same hash value means we should add the StmtSequence to the current
				373	// group.
				374	NewGroup.push_back(StmtsByHash[i].second);
				375	}
				376
				377	// We created a new clone group with matching hash codes and move it to
				378	// the result vector.
				379	Result.push_back(NewGroup);
				380	}
				381	}
				382	// Sequences is the output parameter, so we copy our result into it.
				383	Sequences = Result;
				384	}
				385
				386	size_t MinComplexityConstraint::calculateStmtComplexity(
				387	const StmtSequence &Seq, const std::string &ParentMacroStack) {
				388	if (Seq.empty())
				389	return 0;
				390
				391	size_t Complexity = 1;
				392
				393	ASTContext &Context = Seq.getASTContext();
				394
				395	// Look up what macros expanded into the current statement.
				396	std::string StartMacroStack = getMacroStack(Seq.getStartLoc(), Context);
				397	std::string EndMacroStack = getMacroStack(Seq.getEndLoc(), Context);
				398
				399	// First, check if ParentMacroStack is not empty which means we are currently
				400	// dealing with a parent statement which was expanded from a macro.
				401	// If this parent statement was expanded from the same macros as this
				402	// statement, we reduce the initial complexity of this statement to zero.
				403	// This causes that a group of statements that were generated by a single
				404	// macro expansion will only increase the total complexity by one.
				405	// Note: This is not the final complexity of this statement as we still
				406	// add the complexity of the child statements to the complexity value.
				407	if (!ParentMacroStack.empty() && (StartMacroStack == ParentMacroStack &&
				408	EndMacroStack == ParentMacroStack)) {
				409	Complexity = 0;
				410	}
				411
				412	// Iterate over the Stmts in the StmtSequence and add their complexity values
				413	// to the current complexity value.
				414	if (Seq.holdsSequence()) {
				415	for (const Stmt *S : Seq) {
				416	Complexity += calculateStmtComplexity(
				417	StmtSequence(S, Seq.getContainingDecl()), StartMacroStack);
				418	}
				419	} else {
				420	for (const Stmt *S : Seq.front()->children()) {
				421	Complexity += calculateStmtComplexity(
				422	StmtSequence(S, Seq.getContainingDecl()), StartMacroStack);
				423	}
				424	}
				425	return Complexity;
				426	}
				427
				428	void MatchingVariablePatternConstraint::constrain(
				429	std::vector<CloneDetector::CloneGroup> &CloneGroups) {
				430	CloneConstraint::splitCloneGroups(
				431	CloneGroups, [](const StmtSequence &A, const StmtSequence &B) {
				432	VariablePattern PatternA(A);
				433	VariablePattern PatternB(B);
				434	return PatternA.countPatternDifferences(PatternB) == 0;
				435	});
				436	}
				437
				438	void CloneConstraint::splitCloneGroups(
				439	std::vector<CloneDetector::CloneGroup> &CloneGroups,
				440	std::function<bool(const StmtSequence &, const StmtSequence &)> Compare) {
				441	std::vector<CloneDetector::CloneGroup> Result;
				442	for (auto &HashGroup : CloneGroups) {
				443	// Contains all indexes in HashGroup that were already added to a
				444	// CloneGroup.
				445	std::vector<char> Indexes;
				446	Indexes.resize(HashGroup.size());
				447
				448	for (unsigned i = 0; i < HashGroup.size(); ++i) {
				449	// Skip indexes that are already part of a CloneGroup.
				450	if (Indexes[i])
				451	continue;
				452
				453	// Pick the first unhandled StmtSequence and consider it as the
				454	// beginning
				455	// of a new CloneGroup for now.
				456	// We don't add i to Indexes because we never iterate back.
				457	StmtSequence Prototype = HashGroup[i];
				458	CloneDetector::CloneGroup PotentialGroup = {Prototype};
				459	++Indexes[i];
				460
				461	// Check all following StmtSequences for clones.
				462	for (unsigned j = i + 1; j < HashGroup.size(); ++j) {
				463	// Skip indexes that are already part of a CloneGroup.
				464	if (Indexes[j])
				465	continue;
				466
Raphael Isemann	676b457	2017-06-21 05:41:39 +0000	[diff] [blame]	467	// If a following StmtSequence belongs to our CloneGroup, we add it.
Artem Dergachev	da9e718	2017-04-06 14:34:07 +0000	[diff] [blame]	468	const StmtSequence &Candidate = HashGroup[j];
				469
				470	if (!Compare(Prototype, Candidate))
				471	continue;
				472
				473	PotentialGroup.push_back(Candidate);
				474	// Make sure we never visit this StmtSequence again.
				475	++Indexes[j];
				476	}
				477
				478	// Otherwise, add it to the result and continue searching for more
				479	// groups.
				480	Result.push_back(PotentialGroup);
				481	}
				482
				483	assert(std::all_of(Indexes.begin(), Indexes.end(),
				484	[](char c) { return c == 1; }));
				485	}
				486	CloneGroups = Result;
				487	}
				488
				489	void VariablePattern::addVariableOccurence(const VarDecl *VarDecl,
				490	const Stmt *Mention) {
				491	// First check if we already reference this variable
				492	for (size_t KindIndex = 0; KindIndex < Variables.size(); ++KindIndex) {
				493	if (Variables[KindIndex] == VarDecl) {
				494	// If yes, add a new occurence that points to the existing entry in
				495	// the Variables vector.
				496	Occurences.emplace_back(KindIndex, Mention);
				497	return;
				498	}
				499	}
				500	// If this variable wasn't already referenced, add it to the list of
				501	// referenced variables and add a occurence that points to this new entry.
				502	Occurences.emplace_back(Variables.size(), Mention);
				503	Variables.push_back(VarDecl);
				504	}
				505
				506	void VariablePattern::addVariables(const Stmt *S) {
				507	// Sometimes we get a nullptr (such as from IfStmts which often have nullptr
				508	// children). We skip such statements as they don't reference any
				509	// variables.
				510	if (!S)
				511	return;
				512
				513	// Check if S is a reference to a variable. If yes, add it to the pattern.
				514	if (auto D = dyn_cast<DeclRefExpr>(S)) {
				515	if (auto VD = dyn_cast<VarDecl>(D->getDecl()->getCanonicalDecl()))
				516	addVariableOccurence(VD, D);
				517	}
				518
				519	// Recursively check all children of the given statement.
				520	for (const Stmt *Child : S->children()) {
				521	addVariables(Child);
				522	}
				523	}
				524
				525	unsigned VariablePattern::countPatternDifferences(
				526	const VariablePattern &Other,
				527	VariablePattern::SuspiciousClonePair *FirstMismatch) {
				528	unsigned NumberOfDifferences = 0;
				529
				530	assert(Other.Occurences.size() == Occurences.size());
				531	for (unsigned i = 0; i < Occurences.size(); ++i) {
				532	auto ThisOccurence = Occurences[i];
				533	auto OtherOccurence = Other.Occurences[i];
				534	if (ThisOccurence.KindID == OtherOccurence.KindID)
				535	continue;
				536
				537	++NumberOfDifferences;
				538
				539	// If FirstMismatch is not a nullptr, we need to store information about
				540	// the first difference between the two patterns.
				541	if (FirstMismatch == nullptr)
				542	continue;
				543
				544	// Only proceed if we just found the first difference as we only store
				545	// information about the first difference.
				546	if (NumberOfDifferences != 1)
				547	continue;
				548
				549	const VarDecl *FirstSuggestion = nullptr;
				550	// If there is a variable available in the list of referenced variables
				551	// which wouldn't break the pattern if it is used in place of the
				552	// current variable, we provide this variable as the suggested fix.
				553	if (OtherOccurence.KindID < Variables.size())
				554	FirstSuggestion = Variables[OtherOccurence.KindID];
				555
				556	// Store information about the first clone.
				557	FirstMismatch->FirstCloneInfo =
				558	VariablePattern::SuspiciousClonePair::SuspiciousCloneInfo(
				559	Variables[ThisOccurence.KindID], ThisOccurence.Mention,
				560	FirstSuggestion);
				561
				562	// Same as above but with the other clone. We do this for both clones as
				563	// we don't know which clone is the one containing the unintended
				564	// pattern error.
				565	const VarDecl *SecondSuggestion = nullptr;
				566	if (ThisOccurence.KindID < Other.Variables.size())
				567	SecondSuggestion = Other.Variables[ThisOccurence.KindID];
				568
				569	// Store information about the second clone.
				570	FirstMismatch->SecondCloneInfo =
				571	VariablePattern::SuspiciousClonePair::SuspiciousCloneInfo(
				572	Other.Variables[OtherOccurence.KindID], OtherOccurence.Mention,
				573	SecondSuggestion);
				574
				575	// SuspiciousClonePair guarantees that the first clone always has a
				576	// suggested variable associated with it. As we know that one of the two
				577	// clones in the pair always has suggestion, we swap the two clones
				578	// in case the first clone has no suggested variable which means that
				579	// the second clone has a suggested variable and should be first.
				580	if (!FirstMismatch->FirstCloneInfo.Suggestion)
				581	std::swap(FirstMismatch->FirstCloneInfo, FirstMismatch->SecondCloneInfo);
				582
				583	// This ensures that we always have at least one suggestion in a pair.
				584	assert(FirstMismatch->FirstCloneInfo.Suggestion);
				585	}
				586
				587	return NumberOfDifferences;
Artem Dergachev	2fc1985	2016-08-18 12:29:41 +0000	[diff] [blame]	588	}