Kristof Umann | 7f7dd09 | 2019-05-15 15:06:25 +0000 | [diff] [blame] | 1 | //===--- BranchCloneCheck.cpp - clang-tidy --------------------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | #include "BranchCloneCheck.h" |
| 11 | #include "clang/AST/ASTContext.h" |
| 12 | #include "clang/ASTMatchers/ASTMatchFinder.h" |
| 13 | #include "clang/Analysis/CloneDetection.h" |
| 14 | #include "llvm/Support/Casting.h" |
| 15 | |
| 16 | using namespace clang; |
| 17 | using namespace clang::ast_matchers; |
| 18 | |
| 19 | /// Returns true when the statements are Type I clones of each other. |
| 20 | static bool areStatementsIdentical(const Stmt *LHS, const Stmt *RHS, |
| 21 | const ASTContext &Context) { |
| 22 | llvm::FoldingSetNodeID DataLHS, DataRHS; |
| 23 | LHS->Profile(DataLHS, Context, false); |
| 24 | RHS->Profile(DataRHS, Context, false); |
| 25 | return (DataLHS == DataRHS); |
| 26 | } |
| 27 | |
| 28 | namespace { |
| 29 | /// A branch in a switch may consist of several statements; while a branch in |
| 30 | /// an if/else if/else chain is one statement (which may be a CompoundStmt). |
| 31 | using SwitchBranch = llvm::SmallVector<const Stmt *, 2>; |
| 32 | } // anonymous namespace |
| 33 | |
| 34 | /// Determines if the bodies of two branches in a switch statements are Type I |
| 35 | /// clones of each other. This function only examines the body of the branch |
| 36 | /// and ignores the `case X:` or `default:` at the start of the branch. |
| 37 | static bool areSwitchBranchesIdentical(const SwitchBranch LHS, |
| 38 | const SwitchBranch RHS, |
| 39 | const ASTContext &Context) { |
| 40 | if (LHS.size() != RHS.size()) |
| 41 | return false; |
| 42 | |
| 43 | for (size_t i = 0, Size = LHS.size(); i < Size; i++) { |
| 44 | // NOTE: We strip goto labels and annotations in addition to stripping |
| 45 | // the `case X:` or `default:` labels, but it is very unlikely that this |
| 46 | // would casue false positives in real-world code. |
| 47 | if (!areStatementsIdentical(LHS[i]->stripLabelLikeStatements(), |
| 48 | RHS[i]->stripLabelLikeStatements(), Context)) { |
| 49 | return false; |
| 50 | } |
| 51 | } |
| 52 | |
| 53 | return true; |
| 54 | } |
| 55 | |
| 56 | namespace clang { |
| 57 | namespace tidy { |
| 58 | namespace bugprone { |
| 59 | |
| 60 | void BranchCloneCheck::registerMatchers(MatchFinder *Finder) { |
| 61 | Finder->addMatcher( |
| 62 | ifStmt(stmt().bind("if"), |
| 63 | hasParent(stmt(unless(ifStmt(hasElse(equalsBoundNode("if")))))), |
| 64 | hasElse(stmt().bind("else"))), |
| 65 | this); |
| 66 | Finder->addMatcher(switchStmt().bind("switch"), this); |
| 67 | Finder->addMatcher(conditionalOperator().bind("condOp"), this); |
| 68 | } |
| 69 | |
| 70 | void BranchCloneCheck::check(const MatchFinder::MatchResult &Result) { |
| 71 | const ASTContext &Context = *Result.Context; |
| 72 | |
| 73 | if (const auto *IS = Result.Nodes.getNodeAs<IfStmt>("if")) { |
| 74 | const Stmt *Then = IS->getThen(); |
| 75 | assert(Then && "An IfStmt must have a `then` branch!"); |
| 76 | |
| 77 | const Stmt *Else = Result.Nodes.getNodeAs<Stmt>("else"); |
| 78 | assert(Else && "We only look for `if` statements with an `else` branch!"); |
| 79 | |
| 80 | if (!isa<IfStmt>(Else)) { |
| 81 | // Just a simple if with no `else if` branch. |
| 82 | if (areStatementsIdentical(Then->IgnoreContainers(), |
| 83 | Else->IgnoreContainers(), Context)) { |
| 84 | diag(IS->getBeginLoc(), "if with identical then and else branches"); |
| 85 | diag(IS->getElseLoc(), "else branch starts here", DiagnosticIDs::Note); |
| 86 | } |
| 87 | return; |
| 88 | } |
| 89 | |
| 90 | // This is the complicated case when we start an if/else if/else chain. |
| 91 | // To find all the duplicates, we collect all the branches into a vector. |
| 92 | llvm::SmallVector<const Stmt *, 4> Branches; |
| 93 | const IfStmt *Cur = IS; |
| 94 | while (true) { |
| 95 | // Store the `then` branch. |
| 96 | Branches.push_back(Cur->getThen()); |
| 97 | |
| 98 | Else = Cur->getElse(); |
| 99 | // The chain ends if there is no `else` branch. |
| 100 | if (!Else) |
| 101 | break; |
| 102 | |
| 103 | // Check if there is another `else if`... |
| 104 | Cur = dyn_cast<IfStmt>(Else); |
| 105 | if (!Cur) { |
| 106 | // ...this is just a plain `else` branch at the end of the chain. |
| 107 | Branches.push_back(Else); |
| 108 | break; |
| 109 | } |
| 110 | } |
| 111 | |
| 112 | size_t N = Branches.size(); |
| 113 | llvm::BitVector KnownAsClone(N); |
| 114 | |
| 115 | for (size_t i = 0; i + 1 < N; i++) { |
| 116 | // We have already seen Branches[i] as a clone of an earlier branch. |
| 117 | if (KnownAsClone[i]) |
| 118 | continue; |
| 119 | |
| 120 | int NumCopies = 1; |
| 121 | |
| 122 | for (size_t j = i + 1; j < N; j++) { |
| 123 | if (KnownAsClone[j] || |
| 124 | !areStatementsIdentical(Branches[i]->IgnoreContainers(), |
| 125 | Branches[j]->IgnoreContainers(), Context)) |
| 126 | continue; |
| 127 | |
| 128 | NumCopies++; |
| 129 | KnownAsClone[j] = true; |
| 130 | |
| 131 | if (NumCopies == 2) { |
| 132 | // We report the first occurence only when we find the second one. |
| 133 | diag(Branches[i]->getBeginLoc(), |
| 134 | "repeated branch in conditional chain"); |
| 135 | diag(Lexer::getLocForEndOfToken(Branches[i]->getEndLoc(), 0, |
| 136 | *Result.SourceManager, getLangOpts()), |
| 137 | "end of the original", DiagnosticIDs::Note); |
| 138 | } |
| 139 | |
| 140 | diag(Branches[j]->getBeginLoc(), "clone %0 starts here", |
| 141 | DiagnosticIDs::Note) |
| 142 | << (NumCopies - 1); |
| 143 | } |
| 144 | } |
| 145 | return; |
| 146 | } |
| 147 | |
| 148 | if (const auto *CO = Result.Nodes.getNodeAs<ConditionalOperator>("condOp")) { |
| 149 | // We do not try to detect chains of ?: operators. |
| 150 | if (areStatementsIdentical(CO->getTrueExpr(), CO->getFalseExpr(), Context)) |
| 151 | diag(CO->getQuestionLoc(), |
| 152 | "conditional operator with identical true and false expressions"); |
| 153 | |
| 154 | return; |
| 155 | } |
| 156 | |
| 157 | if (const auto *SS = Result.Nodes.getNodeAs<SwitchStmt>("switch")) { |
| 158 | const CompoundStmt *Body = dyn_cast_or_null<CompoundStmt>(SS->getBody()); |
| 159 | |
| 160 | // Code like |
| 161 | // switch (x) case 0: case 1: foobar(); |
| 162 | // is legal and calls foobar() if and only if x is either 0 or 1; |
| 163 | // but we do not try to distinguish branches in such code. |
| 164 | if (!Body) |
| 165 | return; |
| 166 | |
| 167 | // We will first collect the branches of the switch statements. For the |
| 168 | // sake of simplicity we say that branches are delimited by the SwitchCase |
| 169 | // (`case:` or `default:`) children of Body; that is, we ignore `case:` or |
| 170 | // `default:` labels embedded inside other statements and we do not follow |
| 171 | // the effects of `break` and other manipulation of the control-flow. |
| 172 | llvm::SmallVector<SwitchBranch, 4> Branches; |
| 173 | for (const Stmt *S : Body->body()) { |
| 174 | // If this is a `case` or `default`, we start a new, empty branch. |
| 175 | if (isa<SwitchCase>(S)) |
| 176 | Branches.emplace_back(); |
| 177 | |
| 178 | // There may be code before the first branch (which can be dead code |
| 179 | // and can be code reached either through goto or through case labels |
| 180 | // that are embedded inside e.g. inner compound statements); we do not |
| 181 | // store those statements in branches. |
| 182 | if (!Branches.empty()) |
| 183 | Branches.back().push_back(S); |
| 184 | } |
| 185 | |
| 186 | auto End = Branches.end(); |
| 187 | auto BeginCurrent = Branches.begin(); |
| 188 | while (BeginCurrent < End) { |
| 189 | auto EndCurrent = BeginCurrent + 1; |
| 190 | while (EndCurrent < End && |
| 191 | areSwitchBranchesIdentical(*BeginCurrent, *EndCurrent, Context)) { |
| 192 | ++EndCurrent; |
| 193 | } |
| 194 | // At this point the iterator range {BeginCurrent, EndCurrent} contains a |
| 195 | // complete family of consecutive identical branches. |
| 196 | if (EndCurrent > BeginCurrent + 1) { |
| 197 | diag(BeginCurrent->front()->getBeginLoc(), |
| 198 | "switch has %0 consecutive identical branches") |
| 199 | << static_cast<int>(std::distance(BeginCurrent, EndCurrent)); |
| 200 | |
| 201 | SourceLocation EndLoc = (EndCurrent - 1)->back()->getEndLoc(); |
| 202 | // If the case statement is generated from a macro, it's SourceLocation |
| 203 | // may be invalid, resuling in an assertation failure down the line. |
| 204 | // While not optimal, try the begin location in this case, it's still |
| 205 | // better then nothing. |
| 206 | if (EndLoc.isInvalid()) |
| 207 | EndLoc = (EndCurrent - 1)->back()->getBeginLoc(); |
| 208 | |
| 209 | if (EndLoc.isMacroID()) |
| 210 | EndLoc = Context.getSourceManager().getExpansionLoc(EndLoc); |
| 211 | |
| 212 | diag(Lexer::getLocForEndOfToken(EndLoc, 0, *Result.SourceManager, |
| 213 | getLangOpts()), |
| 214 | "last of these clones ends here", DiagnosticIDs::Note); |
| 215 | } |
| 216 | BeginCurrent = EndCurrent; |
| 217 | } |
| 218 | return; |
| 219 | } |
| 220 | |
| 221 | llvm_unreachable("No if statement and no switch statement."); |
| 222 | } |
| 223 | |
| 224 | } // namespace bugprone |
| 225 | } // namespace tidy |
| 226 | } // namespace clang |