Jonas Toth | 0ea5af7 | 2018-10-31 16:50:44 +0000 | [diff] [blame] | 1 | //===--- IsolateDeclarationCheck.cpp - clang-tidy -------------------------===// |
| 2 | // |
Chandler Carruth | 2946cd7 | 2019-01-19 08:50:56 +0000 | [diff] [blame] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Jonas Toth | 0ea5af7 | 2018-10-31 16:50:44 +0000 | [diff] [blame] | 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "IsolateDeclarationCheck.h" |
| 10 | #include "../utils/LexerUtils.h" |
| 11 | #include "clang/ASTMatchers/ASTMatchFinder.h" |
| 12 | |
| 13 | using namespace clang::ast_matchers; |
| 14 | using namespace clang::tidy::utils::lexer; |
| 15 | |
| 16 | namespace clang { |
| 17 | namespace tidy { |
| 18 | namespace readability { |
| 19 | |
| 20 | namespace { |
| 21 | AST_MATCHER(DeclStmt, isSingleDecl) { return Node.isSingleDecl(); } |
| 22 | AST_MATCHER(DeclStmt, onlyDeclaresVariables) { |
| 23 | return llvm::all_of(Node.decls(), [](Decl *D) { return isa<VarDecl>(D); }); |
| 24 | } |
| 25 | } // namespace |
| 26 | |
| 27 | void IsolateDeclarationCheck::registerMatchers(MatchFinder *Finder) { |
Alexander Kornienko | 976e0c0 | 2018-11-25 02:41:01 +0000 | [diff] [blame] | 28 | Finder->addMatcher(declStmt(onlyDeclaresVariables(), unless(isSingleDecl()), |
| 29 | hasParent(compoundStmt())) |
| 30 | .bind("decl_stmt"), |
| 31 | this); |
Jonas Toth | 0ea5af7 | 2018-10-31 16:50:44 +0000 | [diff] [blame] | 32 | } |
| 33 | |
| 34 | static SourceLocation findStartOfIndirection(SourceLocation Start, |
| 35 | int Indirections, |
| 36 | const SourceManager &SM, |
| 37 | const LangOptions &LangOpts) { |
| 38 | assert(Indirections >= 0 && "Indirections must be non-negative"); |
| 39 | if (Indirections == 0) |
| 40 | return Start; |
| 41 | |
| 42 | // Note that the post-fix decrement is necessary to perform the correct |
| 43 | // number of transformations. |
| 44 | while (Indirections-- != 0) { |
| 45 | Start = findPreviousAnyTokenKind(Start, SM, LangOpts, tok::star, tok::amp); |
| 46 | if (Start.isInvalid() || Start.isMacroID()) |
| 47 | return SourceLocation(); |
| 48 | } |
| 49 | return Start; |
| 50 | } |
| 51 | |
| 52 | static bool isMacroID(SourceRange R) { |
| 53 | return R.getBegin().isMacroID() || R.getEnd().isMacroID(); |
| 54 | } |
| 55 | |
| 56 | /// This function counts the number of written indirections for the given |
| 57 | /// Type \p T. It does \b NOT resolve typedefs as it's a helper for lexing |
| 58 | /// the source code. |
| 59 | /// \see declRanges |
| 60 | static int countIndirections(const Type *T, int Indirections = 0) { |
| 61 | if (T->isFunctionPointerType()) { |
| 62 | const auto *Pointee = T->getPointeeType()->castAs<FunctionType>(); |
| 63 | return countIndirections( |
| 64 | Pointee->getReturnType().IgnoreParens().getTypePtr(), ++Indirections); |
| 65 | } |
| 66 | |
| 67 | // Note: Do not increment the 'Indirections' because it is not yet clear |
| 68 | // if there is an indirection added in the source code of the array |
| 69 | // declaration. |
| 70 | if (const auto *AT = dyn_cast<ArrayType>(T)) |
| 71 | return countIndirections(AT->getElementType().IgnoreParens().getTypePtr(), |
| 72 | Indirections); |
| 73 | |
| 74 | if (isa<PointerType>(T) || isa<ReferenceType>(T)) |
| 75 | return countIndirections(T->getPointeeType().IgnoreParens().getTypePtr(), |
| 76 | ++Indirections); |
| 77 | |
| 78 | return Indirections; |
| 79 | } |
| 80 | |
| 81 | static bool typeIsMemberPointer(const Type *T) { |
| 82 | if (isa<ArrayType>(T)) |
| 83 | return typeIsMemberPointer(T->getArrayElementTypeNoTypeQual()); |
| 84 | |
| 85 | if ((isa<PointerType>(T) || isa<ReferenceType>(T)) && |
| 86 | isa<PointerType>(T->getPointeeType())) |
| 87 | return typeIsMemberPointer(T->getPointeeType().getTypePtr()); |
| 88 | |
| 89 | return isa<MemberPointerType>(T); |
| 90 | } |
| 91 | |
| 92 | /// This function tries to extract the SourceRanges that make up all |
| 93 | /// declarations in this \c DeclStmt. |
| 94 | /// |
| 95 | /// The resulting vector has the structure {UnderlyingType, Decl1, Decl2, ...}. |
| 96 | /// Each \c SourceRange is of the form [Begin, End). |
| 97 | /// If any of the create ranges is invalid or in a macro the result will be |
| 98 | /// \c None. |
| 99 | /// If the \c DeclStmt contains only one declaration, the result is \c None. |
| 100 | /// If the \c DeclStmt contains declarations other than \c VarDecl the result |
| 101 | /// is \c None. |
| 102 | /// |
| 103 | /// \code |
| 104 | /// int * ptr1 = nullptr, value = 42; |
| 105 | /// // [ ][ ] [ ] - The ranges here are inclusive |
| 106 | /// \endcode |
| 107 | /// \todo Generalize this function to take other declarations than \c VarDecl. |
| 108 | static Optional<std::vector<SourceRange>> |
| 109 | declRanges(const DeclStmt *DS, const SourceManager &SM, |
| 110 | const LangOptions &LangOpts) { |
| 111 | std::size_t DeclCount = std::distance(DS->decl_begin(), DS->decl_end()); |
| 112 | if (DeclCount < 2) |
| 113 | return None; |
| 114 | |
| 115 | if (rangeContainsExpansionsOrDirectives(DS->getSourceRange(), SM, LangOpts)) |
| 116 | return None; |
| 117 | |
| 118 | // The initial type of the declaration and each declaration has it's own |
| 119 | // slice. This is necessary, because pointers and references bind only |
| 120 | // to the local variable and not to all variables in the declaration. |
| 121 | // Example: 'int *pointer, value = 42;' |
| 122 | std::vector<SourceRange> Slices; |
| 123 | Slices.reserve(DeclCount + 1); |
| 124 | |
| 125 | // Calculate the first slice, for now only variables are handled but in the |
| 126 | // future this should be relaxed and support various kinds of declarations. |
| 127 | const auto *FirstDecl = dyn_cast<VarDecl>(*DS->decl_begin()); |
| 128 | |
| 129 | if (FirstDecl == nullptr) |
| 130 | return None; |
| 131 | |
| 132 | // FIXME: Member pointers are not transformed correctly right now, that's |
| 133 | // why they are treated as problematic here. |
| 134 | if (typeIsMemberPointer(FirstDecl->getType().IgnoreParens().getTypePtr())) |
| 135 | return None; |
| 136 | |
| 137 | // Consider the following case: 'int * pointer, value = 42;' |
| 138 | // Created slices (inclusive) [ ][ ] [ ] |
| 139 | // Because 'getBeginLoc' points to the start of the variable *name*, the |
| 140 | // location of the pointer must be determined separatly. |
| 141 | SourceLocation Start = findStartOfIndirection( |
| 142 | FirstDecl->getLocation(), |
| 143 | countIndirections(FirstDecl->getType().IgnoreParens().getTypePtr()), SM, |
| 144 | LangOpts); |
| 145 | |
| 146 | // Fix function-pointer declarations that have a '(' in front of the |
| 147 | // pointer. |
| 148 | // Example: 'void (*f2)(int), (*g2)(int, float) = gg;' |
| 149 | // Slices: [ ][ ] [ ] |
| 150 | if (FirstDecl->getType()->isFunctionPointerType()) |
| 151 | Start = findPreviousTokenKind(Start, SM, LangOpts, tok::l_paren); |
| 152 | |
| 153 | // It is popssible that a declarator is wrapped with parens. |
| 154 | // Example: 'float (((*f_ptr2)))[42], *f_ptr3, ((f_value2)) = 42.f;' |
| 155 | // The slice for the type-part must not contain these parens. Consequently |
| 156 | // 'Start' is moved to the most left paren if there are parens. |
| 157 | while (true) { |
| 158 | if (Start.isInvalid() || Start.isMacroID()) |
| 159 | break; |
| 160 | |
| 161 | Token T = getPreviousToken(Start, SM, LangOpts); |
| 162 | if (T.is(tok::l_paren)) { |
| 163 | Start = findPreviousTokenStart(Start, SM, LangOpts); |
| 164 | continue; |
| 165 | } |
| 166 | break; |
| 167 | } |
| 168 | |
| 169 | SourceRange DeclRange(DS->getBeginLoc(), Start); |
| 170 | if (DeclRange.isInvalid() || isMacroID(DeclRange)) |
| 171 | return None; |
| 172 | |
| 173 | // The first slice, that is prepended to every isolated declaration, is |
| 174 | // created. |
| 175 | Slices.emplace_back(DeclRange); |
| 176 | |
| 177 | // Create all following slices that each declare a variable. |
| 178 | SourceLocation DeclBegin = Start; |
| 179 | for (const auto &Decl : DS->decls()) { |
| 180 | const auto *CurrentDecl = cast<VarDecl>(Decl); |
| 181 | |
| 182 | // FIXME: Member pointers are not transformed correctly right now, that's |
| 183 | // why they are treated as problematic here. |
| 184 | if (typeIsMemberPointer(CurrentDecl->getType().IgnoreParens().getTypePtr())) |
| 185 | return None; |
| 186 | |
| 187 | SourceLocation DeclEnd = |
| 188 | CurrentDecl->hasInit() |
| 189 | ? findNextTerminator(CurrentDecl->getInit()->getEndLoc(), SM, |
| 190 | LangOpts) |
| 191 | : findNextTerminator(CurrentDecl->getEndLoc(), SM, LangOpts); |
| 192 | |
| 193 | SourceRange VarNameRange(DeclBegin, DeclEnd); |
| 194 | if (VarNameRange.isInvalid() || isMacroID(VarNameRange)) |
| 195 | return None; |
| 196 | |
| 197 | Slices.emplace_back(VarNameRange); |
| 198 | DeclBegin = DeclEnd.getLocWithOffset(1); |
| 199 | } |
| 200 | return Slices; |
| 201 | } |
| 202 | |
| 203 | static Optional<std::vector<StringRef>> |
| 204 | collectSourceRanges(llvm::ArrayRef<SourceRange> Ranges, const SourceManager &SM, |
| 205 | const LangOptions &LangOpts) { |
| 206 | std::vector<StringRef> Snippets; |
| 207 | Snippets.reserve(Ranges.size()); |
| 208 | |
| 209 | for (const auto &Range : Ranges) { |
| 210 | CharSourceRange CharRange = Lexer::getAsCharRange( |
| 211 | CharSourceRange::getCharRange(Range.getBegin(), Range.getEnd()), SM, |
| 212 | LangOpts); |
| 213 | |
| 214 | if (CharRange.isInvalid()) |
| 215 | return None; |
| 216 | |
| 217 | bool InvalidText = false; |
| 218 | StringRef Snippet = |
| 219 | Lexer::getSourceText(CharRange, SM, LangOpts, &InvalidText); |
| 220 | |
| 221 | if (InvalidText) |
| 222 | return None; |
| 223 | |
| 224 | Snippets.emplace_back(Snippet); |
| 225 | } |
| 226 | |
| 227 | return Snippets; |
| 228 | } |
| 229 | |
| 230 | /// Expects a vector {TypeSnippet, Firstdecl, SecondDecl, ...}. |
| 231 | static std::vector<std::string> |
| 232 | createIsolatedDecls(llvm::ArrayRef<StringRef> Snippets) { |
| 233 | // The first section is the type snippet, which does not make a decl itself. |
| 234 | assert(Snippets.size() > 2 && "Not enough snippets to create isolated decls"); |
| 235 | std::vector<std::string> Decls(Snippets.size() - 1); |
| 236 | |
| 237 | for (std::size_t I = 1; I < Snippets.size(); ++I) |
| 238 | Decls[I - 1] = Twine(Snippets[0]) |
| 239 | .concat(Snippets[0].endswith(" ") ? "" : " ") |
| 240 | .concat(Snippets[I].ltrim()) |
| 241 | .concat(";") |
| 242 | .str(); |
| 243 | |
| 244 | return Decls; |
| 245 | } |
| 246 | |
| 247 | void IsolateDeclarationCheck::check(const MatchFinder::MatchResult &Result) { |
| 248 | const auto *WholeDecl = Result.Nodes.getNodeAs<DeclStmt>("decl_stmt"); |
| 249 | |
| 250 | auto Diag = |
| 251 | diag(WholeDecl->getBeginLoc(), |
| 252 | "multiple declarations in a single statement reduces readability"); |
| 253 | |
| 254 | Optional<std::vector<SourceRange>> PotentialRanges = |
| 255 | declRanges(WholeDecl, *Result.SourceManager, getLangOpts()); |
| 256 | if (!PotentialRanges) |
| 257 | return; |
| 258 | |
| 259 | Optional<std::vector<StringRef>> PotentialSnippets = collectSourceRanges( |
| 260 | *PotentialRanges, *Result.SourceManager, getLangOpts()); |
| 261 | |
| 262 | if (!PotentialSnippets) |
| 263 | return; |
| 264 | |
| 265 | std::vector<std::string> NewDecls = createIsolatedDecls(*PotentialSnippets); |
| 266 | std::string Replacement = llvm::join( |
| 267 | NewDecls, |
| 268 | (Twine("\n") + Lexer::getIndentationForLine(WholeDecl->getBeginLoc(), |
| 269 | *Result.SourceManager)) |
| 270 | .str()); |
| 271 | |
| 272 | Diag << FixItHint::CreateReplacement(WholeDecl->getSourceRange(), |
| 273 | Replacement); |
| 274 | } |
| 275 | } // namespace readability |
| 276 | } // namespace tidy |
| 277 | } // namespace clang |