blob: e5f9ebd4862650c36df89b87a25fb78a540c2b85 [file] [log] [blame]
Kristof Umann7f7dd092019-05-15 15:06:25 +00001//===--- BranchCloneCheck.cpp - clang-tidy --------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "BranchCloneCheck.h"
11#include "clang/AST/ASTContext.h"
12#include "clang/ASTMatchers/ASTMatchFinder.h"
13#include "clang/Analysis/CloneDetection.h"
14#include "llvm/Support/Casting.h"
15
16using namespace clang;
17using namespace clang::ast_matchers;
18
19/// Returns true when the statements are Type I clones of each other.
20static bool areStatementsIdentical(const Stmt *LHS, const Stmt *RHS,
21 const ASTContext &Context) {
22 llvm::FoldingSetNodeID DataLHS, DataRHS;
23 LHS->Profile(DataLHS, Context, false);
24 RHS->Profile(DataRHS, Context, false);
25 return (DataLHS == DataRHS);
26}
27
28namespace {
29/// A branch in a switch may consist of several statements; while a branch in
30/// an if/else if/else chain is one statement (which may be a CompoundStmt).
31using SwitchBranch = llvm::SmallVector<const Stmt *, 2>;
32} // anonymous namespace
33
34/// Determines if the bodies of two branches in a switch statements are Type I
35/// clones of each other. This function only examines the body of the branch
36/// and ignores the `case X:` or `default:` at the start of the branch.
37static bool areSwitchBranchesIdentical(const SwitchBranch LHS,
38 const SwitchBranch RHS,
39 const ASTContext &Context) {
40 if (LHS.size() != RHS.size())
41 return false;
42
43 for (size_t i = 0, Size = LHS.size(); i < Size; i++) {
44 // NOTE: We strip goto labels and annotations in addition to stripping
45 // the `case X:` or `default:` labels, but it is very unlikely that this
46 // would casue false positives in real-world code.
47 if (!areStatementsIdentical(LHS[i]->stripLabelLikeStatements(),
48 RHS[i]->stripLabelLikeStatements(), Context)) {
49 return false;
50 }
51 }
52
53 return true;
54}
55
56namespace clang {
57namespace tidy {
58namespace bugprone {
59
60void BranchCloneCheck::registerMatchers(MatchFinder *Finder) {
61 Finder->addMatcher(
Nathanf9c46222019-12-31 09:57:16 +000062 ifStmt(unless(allOf(isConstexpr(), isInTemplateInstantiation())),
63 stmt().bind("if"),
Kristof Umann7f7dd092019-05-15 15:06:25 +000064 hasParent(stmt(unless(ifStmt(hasElse(equalsBoundNode("if")))))),
65 hasElse(stmt().bind("else"))),
66 this);
67 Finder->addMatcher(switchStmt().bind("switch"), this);
68 Finder->addMatcher(conditionalOperator().bind("condOp"), this);
69}
70
71void BranchCloneCheck::check(const MatchFinder::MatchResult &Result) {
72 const ASTContext &Context = *Result.Context;
73
74 if (const auto *IS = Result.Nodes.getNodeAs<IfStmt>("if")) {
75 const Stmt *Then = IS->getThen();
76 assert(Then && "An IfStmt must have a `then` branch!");
77
78 const Stmt *Else = Result.Nodes.getNodeAs<Stmt>("else");
79 assert(Else && "We only look for `if` statements with an `else` branch!");
80
81 if (!isa<IfStmt>(Else)) {
82 // Just a simple if with no `else if` branch.
83 if (areStatementsIdentical(Then->IgnoreContainers(),
84 Else->IgnoreContainers(), Context)) {
85 diag(IS->getBeginLoc(), "if with identical then and else branches");
86 diag(IS->getElseLoc(), "else branch starts here", DiagnosticIDs::Note);
87 }
88 return;
89 }
90
91 // This is the complicated case when we start an if/else if/else chain.
92 // To find all the duplicates, we collect all the branches into a vector.
93 llvm::SmallVector<const Stmt *, 4> Branches;
94 const IfStmt *Cur = IS;
95 while (true) {
96 // Store the `then` branch.
97 Branches.push_back(Cur->getThen());
98
99 Else = Cur->getElse();
100 // The chain ends if there is no `else` branch.
101 if (!Else)
102 break;
103
104 // Check if there is another `else if`...
105 Cur = dyn_cast<IfStmt>(Else);
106 if (!Cur) {
107 // ...this is just a plain `else` branch at the end of the chain.
108 Branches.push_back(Else);
109 break;
110 }
111 }
112
113 size_t N = Branches.size();
114 llvm::BitVector KnownAsClone(N);
115
116 for (size_t i = 0; i + 1 < N; i++) {
117 // We have already seen Branches[i] as a clone of an earlier branch.
118 if (KnownAsClone[i])
119 continue;
120
121 int NumCopies = 1;
122
123 for (size_t j = i + 1; j < N; j++) {
124 if (KnownAsClone[j] ||
125 !areStatementsIdentical(Branches[i]->IgnoreContainers(),
126 Branches[j]->IgnoreContainers(), Context))
127 continue;
128
129 NumCopies++;
130 KnownAsClone[j] = true;
131
132 if (NumCopies == 2) {
Kazuaki Ishizakidd5571d2020-04-05 15:28:11 +0900133 // We report the first occurrence only when we find the second one.
Kristof Umann7f7dd092019-05-15 15:06:25 +0000134 diag(Branches[i]->getBeginLoc(),
135 "repeated branch in conditional chain");
Nathan Huckleberryb53e13c2019-07-17 17:22:43 +0000136 SourceLocation End =
137 Lexer::getLocForEndOfToken(Branches[i]->getEndLoc(), 0,
138 *Result.SourceManager, getLangOpts());
139 if (End.isValid()) {
140 diag(End, "end of the original", DiagnosticIDs::Note);
141 }
Kristof Umann7f7dd092019-05-15 15:06:25 +0000142 }
143
144 diag(Branches[j]->getBeginLoc(), "clone %0 starts here",
145 DiagnosticIDs::Note)
146 << (NumCopies - 1);
147 }
148 }
149 return;
150 }
151
152 if (const auto *CO = Result.Nodes.getNodeAs<ConditionalOperator>("condOp")) {
153 // We do not try to detect chains of ?: operators.
154 if (areStatementsIdentical(CO->getTrueExpr(), CO->getFalseExpr(), Context))
155 diag(CO->getQuestionLoc(),
156 "conditional operator with identical true and false expressions");
157
158 return;
159 }
160
161 if (const auto *SS = Result.Nodes.getNodeAs<SwitchStmt>("switch")) {
162 const CompoundStmt *Body = dyn_cast_or_null<CompoundStmt>(SS->getBody());
163
164 // Code like
165 // switch (x) case 0: case 1: foobar();
166 // is legal and calls foobar() if and only if x is either 0 or 1;
167 // but we do not try to distinguish branches in such code.
168 if (!Body)
169 return;
170
171 // We will first collect the branches of the switch statements. For the
172 // sake of simplicity we say that branches are delimited by the SwitchCase
173 // (`case:` or `default:`) children of Body; that is, we ignore `case:` or
174 // `default:` labels embedded inside other statements and we do not follow
175 // the effects of `break` and other manipulation of the control-flow.
176 llvm::SmallVector<SwitchBranch, 4> Branches;
177 for (const Stmt *S : Body->body()) {
178 // If this is a `case` or `default`, we start a new, empty branch.
179 if (isa<SwitchCase>(S))
180 Branches.emplace_back();
181
182 // There may be code before the first branch (which can be dead code
183 // and can be code reached either through goto or through case labels
184 // that are embedded inside e.g. inner compound statements); we do not
185 // store those statements in branches.
186 if (!Branches.empty())
187 Branches.back().push_back(S);
188 }
189
190 auto End = Branches.end();
191 auto BeginCurrent = Branches.begin();
192 while (BeginCurrent < End) {
193 auto EndCurrent = BeginCurrent + 1;
194 while (EndCurrent < End &&
195 areSwitchBranchesIdentical(*BeginCurrent, *EndCurrent, Context)) {
196 ++EndCurrent;
197 }
198 // At this point the iterator range {BeginCurrent, EndCurrent} contains a
199 // complete family of consecutive identical branches.
200 if (EndCurrent > BeginCurrent + 1) {
201 diag(BeginCurrent->front()->getBeginLoc(),
202 "switch has %0 consecutive identical branches")
203 << static_cast<int>(std::distance(BeginCurrent, EndCurrent));
204
205 SourceLocation EndLoc = (EndCurrent - 1)->back()->getEndLoc();
206 // If the case statement is generated from a macro, it's SourceLocation
Kazuaki Ishizakidd5571d2020-04-05 15:28:11 +0900207 // may be invalid, resulting in an assertion failure down the line.
Kristof Umann7f7dd092019-05-15 15:06:25 +0000208 // While not optimal, try the begin location in this case, it's still
209 // better then nothing.
210 if (EndLoc.isInvalid())
211 EndLoc = (EndCurrent - 1)->back()->getBeginLoc();
212
213 if (EndLoc.isMacroID())
214 EndLoc = Context.getSourceManager().getExpansionLoc(EndLoc);
Nathan Huckleberryb53e13c2019-07-17 17:22:43 +0000215 EndLoc = Lexer::getLocForEndOfToken(EndLoc, 0, *Result.SourceManager,
216 getLangOpts());
Kristof Umann7f7dd092019-05-15 15:06:25 +0000217
Nathan Huckleberryb53e13c2019-07-17 17:22:43 +0000218 if (EndLoc.isValid()) {
219 diag(EndLoc, "last of these clones ends here", DiagnosticIDs::Note);
220 }
Kristof Umann7f7dd092019-05-15 15:06:25 +0000221 }
222 BeginCurrent = EndCurrent;
223 }
224 return;
225 }
226
227 llvm_unreachable("No if statement and no switch statement.");
228}
229
230} // namespace bugprone
231} // namespace tidy
232} // namespace clang