Kristof Umann | 7f7dd09 | 2019-05-15 15:06:25 +0000 | [diff] [blame] | 1 | //===--- BranchCloneCheck.cpp - clang-tidy --------------------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | #include "BranchCloneCheck.h" |
| 11 | #include "clang/AST/ASTContext.h" |
| 12 | #include "clang/ASTMatchers/ASTMatchFinder.h" |
| 13 | #include "clang/Analysis/CloneDetection.h" |
Nathan James | 860aefd | 2020-06-29 16:05:51 +0100 | [diff] [blame] | 14 | #include "clang/Lex/Lexer.h" |
Kristof Umann | 7f7dd09 | 2019-05-15 15:06:25 +0000 | [diff] [blame] | 15 | #include "llvm/Support/Casting.h" |
| 16 | |
| 17 | using namespace clang; |
| 18 | using namespace clang::ast_matchers; |
| 19 | |
| 20 | /// Returns true when the statements are Type I clones of each other. |
| 21 | static bool areStatementsIdentical(const Stmt *LHS, const Stmt *RHS, |
| 22 | const ASTContext &Context) { |
| 23 | llvm::FoldingSetNodeID DataLHS, DataRHS; |
| 24 | LHS->Profile(DataLHS, Context, false); |
| 25 | RHS->Profile(DataRHS, Context, false); |
| 26 | return (DataLHS == DataRHS); |
| 27 | } |
| 28 | |
| 29 | namespace { |
| 30 | /// A branch in a switch may consist of several statements; while a branch in |
| 31 | /// an if/else if/else chain is one statement (which may be a CompoundStmt). |
| 32 | using SwitchBranch = llvm::SmallVector<const Stmt *, 2>; |
| 33 | } // anonymous namespace |
| 34 | |
| 35 | /// Determines if the bodies of two branches in a switch statements are Type I |
| 36 | /// clones of each other. This function only examines the body of the branch |
| 37 | /// and ignores the `case X:` or `default:` at the start of the branch. |
| 38 | static bool areSwitchBranchesIdentical(const SwitchBranch LHS, |
| 39 | const SwitchBranch RHS, |
| 40 | const ASTContext &Context) { |
| 41 | if (LHS.size() != RHS.size()) |
| 42 | return false; |
| 43 | |
| 44 | for (size_t i = 0, Size = LHS.size(); i < Size; i++) { |
| 45 | // NOTE: We strip goto labels and annotations in addition to stripping |
| 46 | // the `case X:` or `default:` labels, but it is very unlikely that this |
| 47 | // would casue false positives in real-world code. |
| 48 | if (!areStatementsIdentical(LHS[i]->stripLabelLikeStatements(), |
| 49 | RHS[i]->stripLabelLikeStatements(), Context)) { |
| 50 | return false; |
| 51 | } |
| 52 | } |
| 53 | |
| 54 | return true; |
| 55 | } |
| 56 | |
| 57 | namespace clang { |
| 58 | namespace tidy { |
| 59 | namespace bugprone { |
| 60 | |
| 61 | void BranchCloneCheck::registerMatchers(MatchFinder *Finder) { |
| 62 | Finder->addMatcher( |
Nathan | f9c4622 | 2019-12-31 09:57:16 +0000 | [diff] [blame] | 63 | ifStmt(unless(allOf(isConstexpr(), isInTemplateInstantiation())), |
| 64 | stmt().bind("if"), |
Kristof Umann | 7f7dd09 | 2019-05-15 15:06:25 +0000 | [diff] [blame] | 65 | hasParent(stmt(unless(ifStmt(hasElse(equalsBoundNode("if")))))), |
| 66 | hasElse(stmt().bind("else"))), |
| 67 | this); |
| 68 | Finder->addMatcher(switchStmt().bind("switch"), this); |
| 69 | Finder->addMatcher(conditionalOperator().bind("condOp"), this); |
| 70 | } |
| 71 | |
| 72 | void BranchCloneCheck::check(const MatchFinder::MatchResult &Result) { |
| 73 | const ASTContext &Context = *Result.Context; |
| 74 | |
| 75 | if (const auto *IS = Result.Nodes.getNodeAs<IfStmt>("if")) { |
| 76 | const Stmt *Then = IS->getThen(); |
| 77 | assert(Then && "An IfStmt must have a `then` branch!"); |
| 78 | |
| 79 | const Stmt *Else = Result.Nodes.getNodeAs<Stmt>("else"); |
| 80 | assert(Else && "We only look for `if` statements with an `else` branch!"); |
| 81 | |
| 82 | if (!isa<IfStmt>(Else)) { |
| 83 | // Just a simple if with no `else if` branch. |
| 84 | if (areStatementsIdentical(Then->IgnoreContainers(), |
| 85 | Else->IgnoreContainers(), Context)) { |
| 86 | diag(IS->getBeginLoc(), "if with identical then and else branches"); |
| 87 | diag(IS->getElseLoc(), "else branch starts here", DiagnosticIDs::Note); |
| 88 | } |
| 89 | return; |
| 90 | } |
| 91 | |
| 92 | // This is the complicated case when we start an if/else if/else chain. |
| 93 | // To find all the duplicates, we collect all the branches into a vector. |
| 94 | llvm::SmallVector<const Stmt *, 4> Branches; |
| 95 | const IfStmt *Cur = IS; |
| 96 | while (true) { |
| 97 | // Store the `then` branch. |
| 98 | Branches.push_back(Cur->getThen()); |
| 99 | |
| 100 | Else = Cur->getElse(); |
| 101 | // The chain ends if there is no `else` branch. |
| 102 | if (!Else) |
| 103 | break; |
| 104 | |
| 105 | // Check if there is another `else if`... |
| 106 | Cur = dyn_cast<IfStmt>(Else); |
| 107 | if (!Cur) { |
| 108 | // ...this is just a plain `else` branch at the end of the chain. |
| 109 | Branches.push_back(Else); |
| 110 | break; |
| 111 | } |
| 112 | } |
| 113 | |
| 114 | size_t N = Branches.size(); |
| 115 | llvm::BitVector KnownAsClone(N); |
| 116 | |
| 117 | for (size_t i = 0; i + 1 < N; i++) { |
| 118 | // We have already seen Branches[i] as a clone of an earlier branch. |
| 119 | if (KnownAsClone[i]) |
| 120 | continue; |
| 121 | |
| 122 | int NumCopies = 1; |
| 123 | |
| 124 | for (size_t j = i + 1; j < N; j++) { |
| 125 | if (KnownAsClone[j] || |
| 126 | !areStatementsIdentical(Branches[i]->IgnoreContainers(), |
| 127 | Branches[j]->IgnoreContainers(), Context)) |
| 128 | continue; |
| 129 | |
| 130 | NumCopies++; |
| 131 | KnownAsClone[j] = true; |
| 132 | |
| 133 | if (NumCopies == 2) { |
Kazuaki Ishizaki | dd5571d | 2020-04-05 15:28:11 +0900 | [diff] [blame] | 134 | // We report the first occurrence only when we find the second one. |
Kristof Umann | 7f7dd09 | 2019-05-15 15:06:25 +0000 | [diff] [blame] | 135 | diag(Branches[i]->getBeginLoc(), |
| 136 | "repeated branch in conditional chain"); |
Nathan Huckleberry | b53e13c | 2019-07-17 17:22:43 +0000 | [diff] [blame] | 137 | SourceLocation End = |
| 138 | Lexer::getLocForEndOfToken(Branches[i]->getEndLoc(), 0, |
| 139 | *Result.SourceManager, getLangOpts()); |
| 140 | if (End.isValid()) { |
| 141 | diag(End, "end of the original", DiagnosticIDs::Note); |
| 142 | } |
Kristof Umann | 7f7dd09 | 2019-05-15 15:06:25 +0000 | [diff] [blame] | 143 | } |
| 144 | |
| 145 | diag(Branches[j]->getBeginLoc(), "clone %0 starts here", |
| 146 | DiagnosticIDs::Note) |
| 147 | << (NumCopies - 1); |
| 148 | } |
| 149 | } |
| 150 | return; |
| 151 | } |
| 152 | |
| 153 | if (const auto *CO = Result.Nodes.getNodeAs<ConditionalOperator>("condOp")) { |
| 154 | // We do not try to detect chains of ?: operators. |
| 155 | if (areStatementsIdentical(CO->getTrueExpr(), CO->getFalseExpr(), Context)) |
| 156 | diag(CO->getQuestionLoc(), |
| 157 | "conditional operator with identical true and false expressions"); |
| 158 | |
| 159 | return; |
| 160 | } |
| 161 | |
| 162 | if (const auto *SS = Result.Nodes.getNodeAs<SwitchStmt>("switch")) { |
| 163 | const CompoundStmt *Body = dyn_cast_or_null<CompoundStmt>(SS->getBody()); |
| 164 | |
| 165 | // Code like |
| 166 | // switch (x) case 0: case 1: foobar(); |
| 167 | // is legal and calls foobar() if and only if x is either 0 or 1; |
| 168 | // but we do not try to distinguish branches in such code. |
| 169 | if (!Body) |
| 170 | return; |
| 171 | |
| 172 | // We will first collect the branches of the switch statements. For the |
| 173 | // sake of simplicity we say that branches are delimited by the SwitchCase |
| 174 | // (`case:` or `default:`) children of Body; that is, we ignore `case:` or |
| 175 | // `default:` labels embedded inside other statements and we do not follow |
| 176 | // the effects of `break` and other manipulation of the control-flow. |
| 177 | llvm::SmallVector<SwitchBranch, 4> Branches; |
| 178 | for (const Stmt *S : Body->body()) { |
| 179 | // If this is a `case` or `default`, we start a new, empty branch. |
| 180 | if (isa<SwitchCase>(S)) |
| 181 | Branches.emplace_back(); |
| 182 | |
| 183 | // There may be code before the first branch (which can be dead code |
| 184 | // and can be code reached either through goto or through case labels |
| 185 | // that are embedded inside e.g. inner compound statements); we do not |
| 186 | // store those statements in branches. |
| 187 | if (!Branches.empty()) |
| 188 | Branches.back().push_back(S); |
| 189 | } |
| 190 | |
| 191 | auto End = Branches.end(); |
| 192 | auto BeginCurrent = Branches.begin(); |
| 193 | while (BeginCurrent < End) { |
| 194 | auto EndCurrent = BeginCurrent + 1; |
| 195 | while (EndCurrent < End && |
| 196 | areSwitchBranchesIdentical(*BeginCurrent, *EndCurrent, Context)) { |
| 197 | ++EndCurrent; |
| 198 | } |
| 199 | // At this point the iterator range {BeginCurrent, EndCurrent} contains a |
| 200 | // complete family of consecutive identical branches. |
| 201 | if (EndCurrent > BeginCurrent + 1) { |
| 202 | diag(BeginCurrent->front()->getBeginLoc(), |
| 203 | "switch has %0 consecutive identical branches") |
| 204 | << static_cast<int>(std::distance(BeginCurrent, EndCurrent)); |
| 205 | |
| 206 | SourceLocation EndLoc = (EndCurrent - 1)->back()->getEndLoc(); |
| 207 | // If the case statement is generated from a macro, it's SourceLocation |
Kazuaki Ishizaki | dd5571d | 2020-04-05 15:28:11 +0900 | [diff] [blame] | 208 | // may be invalid, resulting in an assertion failure down the line. |
Kristof Umann | 7f7dd09 | 2019-05-15 15:06:25 +0000 | [diff] [blame] | 209 | // While not optimal, try the begin location in this case, it's still |
| 210 | // better then nothing. |
| 211 | if (EndLoc.isInvalid()) |
| 212 | EndLoc = (EndCurrent - 1)->back()->getBeginLoc(); |
| 213 | |
| 214 | if (EndLoc.isMacroID()) |
| 215 | EndLoc = Context.getSourceManager().getExpansionLoc(EndLoc); |
Nathan Huckleberry | b53e13c | 2019-07-17 17:22:43 +0000 | [diff] [blame] | 216 | EndLoc = Lexer::getLocForEndOfToken(EndLoc, 0, *Result.SourceManager, |
| 217 | getLangOpts()); |
Kristof Umann | 7f7dd09 | 2019-05-15 15:06:25 +0000 | [diff] [blame] | 218 | |
Nathan Huckleberry | b53e13c | 2019-07-17 17:22:43 +0000 | [diff] [blame] | 219 | if (EndLoc.isValid()) { |
| 220 | diag(EndLoc, "last of these clones ends here", DiagnosticIDs::Note); |
| 221 | } |
Kristof Umann | 7f7dd09 | 2019-05-15 15:06:25 +0000 | [diff] [blame] | 222 | } |
| 223 | BeginCurrent = EndCurrent; |
| 224 | } |
| 225 | return; |
| 226 | } |
| 227 | |
| 228 | llvm_unreachable("No if statement and no switch statement."); |
| 229 | } |
| 230 | |
| 231 | } // namespace bugprone |
| 232 | } // namespace tidy |
| 233 | } // namespace clang |