Artem Dergachev | ba81632 | 2016-07-26 18:13:12 +0000 | [diff] [blame] | 1 | //===--- CloneChecker.cpp - Clone detection checker -------------*- C++ -*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | /// |
| 10 | /// \file |
| 11 | /// CloneChecker is a checker that reports clones in the current translation |
| 12 | /// unit. |
| 13 | /// |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #include "ClangSACheckers.h" |
| 17 | #include "clang/Analysis/CloneDetection.h" |
| 18 | #include "clang/Basic/Diagnostic.h" |
Artem Dergachev | 4eca0de | 2016-10-08 10:54:30 +0000 | [diff] [blame] | 19 | #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" |
Artem Dergachev | ba81632 | 2016-07-26 18:13:12 +0000 | [diff] [blame] | 20 | #include "clang/StaticAnalyzer/Core/Checker.h" |
| 21 | #include "clang/StaticAnalyzer/Core/CheckerManager.h" |
Artem Dergachev | 4eca0de | 2016-10-08 10:54:30 +0000 | [diff] [blame] | 22 | #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" |
Artem Dergachev | ba81632 | 2016-07-26 18:13:12 +0000 | [diff] [blame] | 23 | #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" |
| 24 | |
| 25 | using namespace clang; |
| 26 | using namespace ento; |
| 27 | |
| 28 | namespace { |
| 29 | class CloneChecker |
| 30 | : public Checker<check::ASTCodeBody, check::EndOfTranslationUnit> { |
Artem Dergachev | 96034ca | 2016-07-26 19:05:22 +0000 | [diff] [blame] | 31 | mutable CloneDetector Detector; |
Artem Dergachev | 4eca0de | 2016-10-08 10:54:30 +0000 | [diff] [blame] | 32 | mutable std::unique_ptr<BugType> BT_Exact, BT_Suspicious; |
Artem Dergachev | ba81632 | 2016-07-26 18:13:12 +0000 | [diff] [blame] | 33 | |
| 34 | public: |
| 35 | void checkASTCodeBody(const Decl *D, AnalysisManager &Mgr, |
| 36 | BugReporter &BR) const; |
| 37 | |
| 38 | void checkEndOfTranslationUnit(const TranslationUnitDecl *TU, |
| 39 | AnalysisManager &Mgr, BugReporter &BR) const; |
Artem Dergachev | 2fc1985 | 2016-08-18 12:29:41 +0000 | [diff] [blame] | 40 | |
Artem Dergachev | da9e718 | 2017-04-06 14:34:07 +0000 | [diff] [blame] | 41 | /// Reports all clones to the user. |
Artem Dergachev | 4eca0de | 2016-10-08 10:54:30 +0000 | [diff] [blame] | 42 | void reportClones(BugReporter &BR, AnalysisManager &Mgr, |
Artem Dergachev | da9e718 | 2017-04-06 14:34:07 +0000 | [diff] [blame] | 43 | std::vector<CloneDetector::CloneGroup> &CloneGroups) const; |
Artem Dergachev | 2fc1985 | 2016-08-18 12:29:41 +0000 | [diff] [blame] | 44 | |
Artem Dergachev | da9e718 | 2017-04-06 14:34:07 +0000 | [diff] [blame] | 45 | /// Reports only suspicious clones to the user along with informaton |
| 46 | /// that explain why they are suspicious. |
| 47 | void reportSuspiciousClones( |
| 48 | BugReporter &BR, AnalysisManager &Mgr, |
| 49 | std::vector<CloneDetector::CloneGroup> &CloneGroups) const; |
Artem Dergachev | ba81632 | 2016-07-26 18:13:12 +0000 | [diff] [blame] | 50 | }; |
| 51 | } // end anonymous namespace |
| 52 | |
| 53 | void CloneChecker::checkASTCodeBody(const Decl *D, AnalysisManager &Mgr, |
| 54 | BugReporter &BR) const { |
| 55 | // Every statement that should be included in the search for clones needs to |
| 56 | // be passed to the CloneDetector. |
Artem Dergachev | 96034ca | 2016-07-26 19:05:22 +0000 | [diff] [blame] | 57 | Detector.analyzeCodeBody(D); |
Artem Dergachev | ba81632 | 2016-07-26 18:13:12 +0000 | [diff] [blame] | 58 | } |
| 59 | |
| 60 | void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU, |
| 61 | AnalysisManager &Mgr, |
| 62 | BugReporter &BR) const { |
| 63 | // At this point, every statement in the translation unit has been analyzed by |
| 64 | // the CloneDetector. The only thing left to do is to report the found clones. |
| 65 | |
| 66 | int MinComplexity = Mgr.getAnalyzerOptions().getOptionAsInteger( |
Raphael Isemann | 561f0de | 2017-09-04 05:56:36 +0000 | [diff] [blame] | 67 | "MinimumCloneComplexity", 50, this); |
Artem Dergachev | ba81632 | 2016-07-26 18:13:12 +0000 | [diff] [blame] | 68 | assert(MinComplexity >= 0); |
| 69 | |
Artem Dergachev | 2fc1985 | 2016-08-18 12:29:41 +0000 | [diff] [blame] | 70 | bool ReportSuspiciousClones = Mgr.getAnalyzerOptions().getBooleanOption( |
| 71 | "ReportSuspiciousClones", true, this); |
| 72 | |
| 73 | bool ReportNormalClones = Mgr.getAnalyzerOptions().getBooleanOption( |
| 74 | "ReportNormalClones", true, this); |
| 75 | |
Leslie Zhai | d91d19e | 2017-06-19 01:55:50 +0000 | [diff] [blame] | 76 | StringRef IgnoredFilesPattern = Mgr.getAnalyzerOptions().getOptionAsString( |
| 77 | "IgnoredFilesPattern", "", this); |
| 78 | |
Artem Dergachev | da9e718 | 2017-04-06 14:34:07 +0000 | [diff] [blame] | 79 | // Let the CloneDetector create a list of clones from all the analyzed |
| 80 | // statements. We don't filter for matching variable patterns at this point |
| 81 | // because reportSuspiciousClones() wants to search them for errors. |
| 82 | std::vector<CloneDetector::CloneGroup> AllCloneGroups; |
Artem Dergachev | f8b4fc3 | 2017-04-05 14:17:36 +0000 | [diff] [blame] | 83 | |
Raphael Isemann | 70686a1 | 2017-08-31 07:10:46 +0000 | [diff] [blame] | 84 | Detector.findClones( |
| 85 | AllCloneGroups, FilenamePatternConstraint(IgnoredFilesPattern), |
| 86 | RecursiveCloneTypeIIHashConstraint(), MinGroupSizeConstraint(2), |
| 87 | MinComplexityConstraint(MinComplexity), |
| 88 | RecursiveCloneTypeIIVerifyConstraint(), OnlyLargestCloneConstraint()); |
Artem Dergachev | da9e718 | 2017-04-06 14:34:07 +0000 | [diff] [blame] | 89 | |
| 90 | if (ReportSuspiciousClones) |
| 91 | reportSuspiciousClones(BR, Mgr, AllCloneGroups); |
| 92 | |
| 93 | // We are done for this translation unit unless we also need to report normal |
| 94 | // clones. |
| 95 | if (!ReportNormalClones) |
| 96 | return; |
| 97 | |
| 98 | // Now that the suspicious clone detector has checked for pattern errors, |
| 99 | // we also filter all clones who don't have matching patterns |
| 100 | CloneDetector::constrainClones(AllCloneGroups, |
| 101 | MatchingVariablePatternConstraint(), |
| 102 | MinGroupSizeConstraint(2)); |
| 103 | |
| 104 | reportClones(BR, Mgr, AllCloneGroups); |
Artem Dergachev | 2fc1985 | 2016-08-18 12:29:41 +0000 | [diff] [blame] | 105 | } |
| 106 | |
Artem Dergachev | 4eca0de | 2016-10-08 10:54:30 +0000 | [diff] [blame] | 107 | static PathDiagnosticLocation makeLocation(const StmtSequence &S, |
| 108 | AnalysisManager &Mgr) { |
| 109 | ASTContext &ACtx = Mgr.getASTContext(); |
| 110 | return PathDiagnosticLocation::createBegin( |
| 111 | S.front(), ACtx.getSourceManager(), |
| 112 | Mgr.getAnalysisDeclContext(ACtx.getTranslationUnitDecl())); |
| 113 | } |
| 114 | |
Artem Dergachev | da9e718 | 2017-04-06 14:34:07 +0000 | [diff] [blame] | 115 | void CloneChecker::reportClones( |
| 116 | BugReporter &BR, AnalysisManager &Mgr, |
| 117 | std::vector<CloneDetector::CloneGroup> &CloneGroups) const { |
Artem Dergachev | ba81632 | 2016-07-26 18:13:12 +0000 | [diff] [blame] | 118 | |
Artem Dergachev | 4eca0de | 2016-10-08 10:54:30 +0000 | [diff] [blame] | 119 | if (!BT_Exact) |
| 120 | BT_Exact.reset(new BugType(this, "Exact code clone", "Code clone")); |
Artem Dergachev | ba81632 | 2016-07-26 18:13:12 +0000 | [diff] [blame] | 121 | |
Artem Dergachev | da9e718 | 2017-04-06 14:34:07 +0000 | [diff] [blame] | 122 | for (const CloneDetector::CloneGroup &Group : CloneGroups) { |
Artem Dergachev | ba81632 | 2016-07-26 18:13:12 +0000 | [diff] [blame] | 123 | // We group the clones by printing the first as a warning and all others |
| 124 | // as a note. |
Artem Dergachev | da9e718 | 2017-04-06 14:34:07 +0000 | [diff] [blame] | 125 | auto R = llvm::make_unique<BugReport>(*BT_Exact, "Duplicate code detected", |
| 126 | makeLocation(Group.front(), Mgr)); |
| 127 | R->addRange(Group.front().getSourceRange()); |
Artem Dergachev | 4eca0de | 2016-10-08 10:54:30 +0000 | [diff] [blame] | 128 | |
Artem Dergachev | da9e718 | 2017-04-06 14:34:07 +0000 | [diff] [blame] | 129 | for (unsigned i = 1; i < Group.size(); ++i) |
| 130 | R->addNote("Similar code here", makeLocation(Group[i], Mgr), |
| 131 | Group[i].getSourceRange()); |
Artem Dergachev | 4eca0de | 2016-10-08 10:54:30 +0000 | [diff] [blame] | 132 | BR.emitReport(std::move(R)); |
Artem Dergachev | ba81632 | 2016-07-26 18:13:12 +0000 | [diff] [blame] | 133 | } |
| 134 | } |
| 135 | |
Artem Dergachev | da9e718 | 2017-04-06 14:34:07 +0000 | [diff] [blame] | 136 | void CloneChecker::reportSuspiciousClones( |
| 137 | BugReporter &BR, AnalysisManager &Mgr, |
| 138 | std::vector<CloneDetector::CloneGroup> &CloneGroups) const { |
| 139 | std::vector<VariablePattern::SuspiciousClonePair> Pairs; |
Artem Dergachev | 2fc1985 | 2016-08-18 12:29:41 +0000 | [diff] [blame] | 140 | |
Artem Dergachev | da9e718 | 2017-04-06 14:34:07 +0000 | [diff] [blame] | 141 | for (const CloneDetector::CloneGroup &Group : CloneGroups) { |
| 142 | for (unsigned i = 0; i < Group.size(); ++i) { |
| 143 | VariablePattern PatternA(Group[i]); |
| 144 | |
| 145 | for (unsigned j = i + 1; j < Group.size(); ++j) { |
| 146 | VariablePattern PatternB(Group[j]); |
| 147 | |
| 148 | VariablePattern::SuspiciousClonePair ClonePair; |
| 149 | // For now, we only report clones which break the variable pattern just |
| 150 | // once because multiple differences in a pattern are an indicator that |
| 151 | // those differences are maybe intended (e.g. because it's actually a |
| 152 | // different algorithm). |
| 153 | // FIXME: In very big clones even multiple variables can be unintended, |
| 154 | // so replacing this number with a percentage could better handle such |
| 155 | // cases. On the other hand it could increase the false-positive rate |
| 156 | // for all clones if the percentage is too high. |
| 157 | if (PatternA.countPatternDifferences(PatternB, &ClonePair) == 1) { |
| 158 | Pairs.push_back(ClonePair); |
| 159 | break; |
| 160 | } |
| 161 | } |
| 162 | } |
| 163 | } |
Artem Dergachev | 2fc1985 | 2016-08-18 12:29:41 +0000 | [diff] [blame] | 164 | |
Artem Dergachev | 4eca0de | 2016-10-08 10:54:30 +0000 | [diff] [blame] | 165 | if (!BT_Suspicious) |
| 166 | BT_Suspicious.reset( |
| 167 | new BugType(this, "Suspicious code clone", "Code clone")); |
Artem Dergachev | 2fc1985 | 2016-08-18 12:29:41 +0000 | [diff] [blame] | 168 | |
Artem Dergachev | 4eca0de | 2016-10-08 10:54:30 +0000 | [diff] [blame] | 169 | ASTContext &ACtx = BR.getContext(); |
| 170 | SourceManager &SM = ACtx.getSourceManager(); |
| 171 | AnalysisDeclContext *ADC = |
| 172 | Mgr.getAnalysisDeclContext(ACtx.getTranslationUnitDecl()); |
Artem Dergachev | 2fc1985 | 2016-08-18 12:29:41 +0000 | [diff] [blame] | 173 | |
Artem Dergachev | da9e718 | 2017-04-06 14:34:07 +0000 | [diff] [blame] | 174 | for (VariablePattern::SuspiciousClonePair &Pair : Pairs) { |
Artem Dergachev | 4eca0de | 2016-10-08 10:54:30 +0000 | [diff] [blame] | 175 | // FIXME: We are ignoring the suggestions currently, because they are |
| 176 | // only 50% accurate (even if the second suggestion is unavailable), |
| 177 | // which may confuse the user. |
| 178 | // Think how to perform more accurate suggestions? |
Artem Dergachev | 2fc1985 | 2016-08-18 12:29:41 +0000 | [diff] [blame] | 179 | |
Artem Dergachev | 4eca0de | 2016-10-08 10:54:30 +0000 | [diff] [blame] | 180 | auto R = llvm::make_unique<BugReport>( |
| 181 | *BT_Suspicious, |
| 182 | "Potential copy-paste error; did you really mean to use '" + |
| 183 | Pair.FirstCloneInfo.Variable->getNameAsString() + "' here?", |
| 184 | PathDiagnosticLocation::createBegin(Pair.FirstCloneInfo.Mention, SM, |
| 185 | ADC)); |
| 186 | R->addRange(Pair.FirstCloneInfo.Mention->getSourceRange()); |
Artem Dergachev | 2fc1985 | 2016-08-18 12:29:41 +0000 | [diff] [blame] | 187 | |
Artem Dergachev | 4eca0de | 2016-10-08 10:54:30 +0000 | [diff] [blame] | 188 | R->addNote("Similar code using '" + |
| 189 | Pair.SecondCloneInfo.Variable->getNameAsString() + "' here", |
| 190 | PathDiagnosticLocation::createBegin(Pair.SecondCloneInfo.Mention, |
| 191 | SM, ADC), |
| 192 | Pair.SecondCloneInfo.Mention->getSourceRange()); |
| 193 | |
| 194 | BR.emitReport(std::move(R)); |
Artem Dergachev | 2fc1985 | 2016-08-18 12:29:41 +0000 | [diff] [blame] | 195 | } |
| 196 | } |
| 197 | |
Artem Dergachev | ba81632 | 2016-07-26 18:13:12 +0000 | [diff] [blame] | 198 | //===----------------------------------------------------------------------===// |
| 199 | // Register CloneChecker |
| 200 | //===----------------------------------------------------------------------===// |
| 201 | |
| 202 | void ento::registerCloneChecker(CheckerManager &Mgr) { |
| 203 | Mgr.registerChecker<CloneChecker>(); |
| 204 | } |