blob: dba5e4d44c854fcdc474ee2eea2325be72549992 [file] [log] [blame]
Daniel Jasper32d28ee2013-01-29 21:01:14 +00001//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements a token annotator, i.e. creates
12/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
17#define LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
18
19#include "UnwrappedLineParser.h"
20#include "clang/Basic/OperatorPrecedence.h"
21#include "clang/Format/Format.h"
22#include <string>
23
24namespace clang {
25class Lexer;
26class SourceManager;
27
28namespace format {
29
30enum TokenType {
31 TT_BinaryOperator,
32 TT_BlockComment,
33 TT_CastRParen,
34 TT_ConditionalExpr,
35 TT_CtorInitializerColon,
Daniel Jasper5ad390d2013-05-28 11:30:49 +000036 TT_DesignatedInitializerPeriod,
Daniel Jasper32d28ee2013-01-29 21:01:14 +000037 TT_ImplicitStringLiteral,
Daniel Jasper923ebef2013-03-14 13:45:21 +000038 TT_InlineASMColon,
Daniel Jasper6cabab42013-02-14 08:42:54 +000039 TT_InheritanceColon,
Daniel Jasper431f5912013-05-28 08:33:00 +000040 TT_FunctionTypeLParen,
Daniel Jasper32d28ee2013-01-29 21:01:14 +000041 TT_LineComment,
Nico Weber051860e2013-02-10 02:08:05 +000042 TT_ObjCArrayLiteral,
Daniel Jasper32d28ee2013-01-29 21:01:14 +000043 TT_ObjCBlockLParen,
44 TT_ObjCDecl,
Nico Weberf2ff8122013-05-26 05:39:26 +000045 TT_ObjCDictLiteral,
Nico Weberc2e6d2a2013-02-11 15:32:15 +000046 TT_ObjCForIn,
Daniel Jasper32d28ee2013-01-29 21:01:14 +000047 TT_ObjCMethodExpr,
Nico Weber051860e2013-02-10 02:08:05 +000048 TT_ObjCMethodSpecifier,
Daniel Jasper32d28ee2013-01-29 21:01:14 +000049 TT_ObjCProperty,
Daniel Jasper63d7ced2013-02-05 10:07:47 +000050 TT_ObjCSelectorName,
Daniel Jasper6ea933c2013-05-10 07:59:58 +000051 TT_OverloadedOperator,
Daniel Jasper2b4c9242013-02-11 08:01:18 +000052 TT_OverloadedOperatorLParen,
Daniel Jasper32d28ee2013-01-29 21:01:14 +000053 TT_PointerOrReference,
54 TT_PureVirtualSpecifier,
55 TT_RangeBasedForLoopColon,
56 TT_StartOfName,
57 TT_TemplateCloser,
58 TT_TemplateOpener,
59 TT_TrailingUnaryOperator,
60 TT_UnaryOperator,
61 TT_Unknown
62};
63
64enum LineType {
65 LT_Invalid,
66 LT_Other,
67 LT_BuilderTypeCall,
68 LT_PreprocessorDirective,
69 LT_VirtualFunctionDecl,
70 LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
71 LT_ObjCMethodDecl,
72 LT_ObjCProperty // An @property line.
73};
74
75class AnnotatedToken {
76public:
Manuel Klimekdcb3f2a2013-05-28 13:42:28 +000077 explicit AnnotatedToken(FormatToken *FormatTok)
Daniel Jasper729a7432013-02-11 12:36:37 +000078 : FormatTok(FormatTok), Type(TT_Unknown), SpacesRequiredBefore(0),
Daniel Jasper32d28ee2013-01-29 21:01:14 +000079 CanBreakBefore(false), MustBreakBefore(false),
80 ClosesTemplateDeclaration(false), MatchingParen(NULL),
Manuel Klimekdcb3f2a2013-05-28 13:42:28 +000081 ParameterCount(0), TotalLength(FormatTok->TokenLength),
Manuel Klimeke573c3f2013-05-22 12:51:29 +000082 UnbreakableTailLength(0), BindingStrength(0), SplitPenalty(0),
Daniel Jasper431f5912013-05-28 08:33:00 +000083 LongestObjCSelectorName(0), Parent(NULL), FakeRParens(0),
84 LastInChainOfCalls(false), PartOfMultiVariableDeclStmt(false) {}
Daniel Jasper32d28ee2013-01-29 21:01:14 +000085
Manuel Klimekdcb3f2a2013-05-28 13:42:28 +000086 bool is(tok::TokenKind Kind) const { return FormatTok->Tok.is(Kind); }
Alexander Kornienkoe74de282013-03-13 14:41:29 +000087
88 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
89 return is(K1) || is(K2);
90 }
91
92 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3) const {
93 return is(K1) || is(K2) || is(K3);
94 }
95
96 bool isOneOf(
97 tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3,
98 tok::TokenKind K4, tok::TokenKind K5 = tok::NUM_TOKENS,
99 tok::TokenKind K6 = tok::NUM_TOKENS, tok::TokenKind K7 = tok::NUM_TOKENS,
100 tok::TokenKind K8 = tok::NUM_TOKENS, tok::TokenKind K9 = tok::NUM_TOKENS,
101 tok::TokenKind K10 = tok::NUM_TOKENS,
102 tok::TokenKind K11 = tok::NUM_TOKENS,
103 tok::TokenKind K12 = tok::NUM_TOKENS) const {
104 return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) ||
105 is(K8) || is(K9) || is(K10) || is(K11) || is(K12);
106 }
107
Manuel Klimekdcb3f2a2013-05-28 13:42:28 +0000108 bool isNot(tok::TokenKind Kind) const { return FormatTok->Tok.isNot(Kind); }
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000109
110 bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
Manuel Klimekdcb3f2a2013-05-28 13:42:28 +0000111 return FormatTok->Tok.isObjCAtKeyword(Kind);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000112 }
113
Alexander Kornienko94b748f2013-03-27 17:08:02 +0000114 bool isAccessSpecifier(bool ColonRequired = true) const {
115 return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) &&
116 (!ColonRequired ||
117 (!Children.empty() && Children[0].is(tok::colon)));
118 }
119
120 bool isObjCAccessSpecifier() const {
121 return is(tok::at) && !Children.empty() &&
122 (Children[0].isObjCAtKeyword(tok::objc_public) ||
123 Children[0].isObjCAtKeyword(tok::objc_protected) ||
124 Children[0].isObjCAtKeyword(tok::objc_package) ||
125 Children[0].isObjCAtKeyword(tok::objc_private));
126 }
127
Daniel Jasperac3223e2013-04-10 09:49:49 +0000128 /// \brief Returns whether \p Tok is ([{ or a template opening <.
129 bool opensScope() const;
130 /// \brief Returns whether \p Tok is )]} or a template opening >.
131 bool closesScope() const;
132
133 bool isUnaryOperator() const;
134 bool isBinaryOperator() const;
135 bool isTrailingComment() const;
136
Manuel Klimekdcb3f2a2013-05-28 13:42:28 +0000137 FormatToken *FormatTok;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000138
139 TokenType Type;
140
Daniel Jasper729a7432013-02-11 12:36:37 +0000141 unsigned SpacesRequiredBefore;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000142 bool CanBreakBefore;
143 bool MustBreakBefore;
144
145 bool ClosesTemplateDeclaration;
146
147 AnnotatedToken *MatchingParen;
148
149 /// \brief Number of parameters, if this is "(", "[" or "<".
150 ///
151 /// This is initialized to 1 as we don't need to distinguish functions with
152 /// 0 parameters from functions with 1 parameter. Thus, we can simply count
153 /// the number of commas.
154 unsigned ParameterCount;
155
156 /// \brief The total length of the line up to and including this token.
157 unsigned TotalLength;
158
Manuel Klimeke573c3f2013-05-22 12:51:29 +0000159 /// \brief The length of following tokens until the next natural split point,
160 /// or the next token that can be broken.
161 unsigned UnbreakableTailLength;
162
Daniel Jasper01786732013-02-04 07:21:18 +0000163 // FIXME: Come up with a 'cleaner' concept.
164 /// \brief The binding strength of a token. This is a combined value of
165 /// operator precedence, parenthesis nesting, etc.
166 unsigned BindingStrength;
167
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000168 /// \brief Penalty for inserting a line break before this token.
169 unsigned SplitPenalty;
170
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000171 /// \brief If this is the first ObjC selector name in an ObjC method
172 /// definition or call, this contains the length of the longest name.
173 unsigned LongestObjCSelectorName;
174
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000175 std::vector<AnnotatedToken> Children;
176 AnnotatedToken *Parent;
177
Daniel Jasperbf71ba22013-04-08 20:33:42 +0000178 /// \brief Stores the number of required fake parentheses and the
179 /// corresponding operator precedence.
180 ///
181 /// If multiple fake parentheses start at a token, this vector stores them in
182 /// reverse order, i.e. inner fake parenthesis first.
183 SmallVector<prec::Level, 4> FakeLParens;
Daniel Jasper087387a2013-02-08 16:49:27 +0000184 /// \brief Insert this many fake ) after this token for correct indentation.
Daniel Jasper29f123b2013-02-08 15:28:42 +0000185 unsigned FakeRParens;
186
Daniel Jasper24849712013-03-01 16:48:32 +0000187 /// \brief Is this the last "." or "->" in a builder-type call?
188 bool LastInChainOfCalls;
189
Daniel Jasper8ed9f2b2013-04-03 13:36:17 +0000190 /// \brief Is this token part of a \c DeclStmt defining multiple variables?
191 ///
192 /// Only set if \c Type == \c TT_StartOfName.
193 bool PartOfMultiVariableDeclStmt;
194
Daniel Jasperac3223e2013-04-10 09:49:49 +0000195 /// \brief Returns the previous token ignoring comments.
196 AnnotatedToken *getPreviousNoneComment() const;
197
198 /// \brief Returns the next token ignoring comments.
199 const AnnotatedToken *getNextNoneComment() const;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000200};
201
202class AnnotatedLine {
203public:
204 AnnotatedLine(const UnwrappedLine &Line)
205 : First(Line.Tokens.front()), Level(Line.Level),
206 InPPDirective(Line.InPPDirective),
Daniel Jasper53e72cd2013-05-06 08:27:33 +0000207 MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
208 StartsDefinition(false) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000209 assert(!Line.Tokens.empty());
210 AnnotatedToken *Current = &First;
Manuel Klimekdcb3f2a2013-05-28 13:42:28 +0000211 for (std::list<FormatToken *>::const_iterator I = ++Line.Tokens.begin(),
212 E = Line.Tokens.end();
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000213 I != E; ++I) {
214 Current->Children.push_back(AnnotatedToken(*I));
215 Current->Children[0].Parent = Current;
216 Current = &Current->Children[0];
217 }
218 Last = Current;
219 }
220 AnnotatedLine(const AnnotatedLine &Other)
221 : First(Other.First), Type(Other.Type), Level(Other.Level),
222 InPPDirective(Other.InPPDirective),
Daniel Jasper3c08a812013-02-24 18:54:32 +0000223 MustBeDeclaration(Other.MustBeDeclaration),
Daniel Jasper53e72cd2013-05-06 08:27:33 +0000224 MightBeFunctionDecl(Other.MightBeFunctionDecl),
225 StartsDefinition(Other.StartsDefinition) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000226 Last = &First;
227 while (!Last->Children.empty()) {
228 Last->Children[0].Parent = Last;
229 Last = &Last->Children[0];
230 }
231 }
232
233 AnnotatedToken First;
234 AnnotatedToken *Last;
235
236 LineType Type;
237 unsigned Level;
238 bool InPPDirective;
239 bool MustBeDeclaration;
Daniel Jasper3c08a812013-02-24 18:54:32 +0000240 bool MightBeFunctionDecl;
Daniel Jasper53e72cd2013-05-06 08:27:33 +0000241 bool StartsDefinition;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000242};
243
244inline prec::Level getPrecedence(const AnnotatedToken &Tok) {
Manuel Klimekdcb3f2a2013-05-28 13:42:28 +0000245 return getBinOpPrecedence(Tok.FormatTok->Tok.getKind(), true, true);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000246}
247
248/// \brief Determines extra information about the tokens comprising an
249/// \c UnwrappedLine.
250class TokenAnnotator {
251public:
Nico Weberc2e6d2a2013-02-11 15:32:15 +0000252 TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex,
253 IdentifierInfo &Ident_in)
254 : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Ident_in(Ident_in) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000255 }
256
Daniel Jasper8ff690a2013-02-06 14:22:40 +0000257 void annotate(AnnotatedLine &Line);
258 void calculateFormattingInformation(AnnotatedLine &Line);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000259
260private:
261 /// \brief Calculate the penalty for splitting before \c Tok.
Daniel Jasper8ff690a2013-02-06 14:22:40 +0000262 unsigned splitPenalty(const AnnotatedLine &Line, const AnnotatedToken &Tok);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000263
Daniel Jasper8ff690a2013-02-06 14:22:40 +0000264 bool spaceRequiredBetween(const AnnotatedLine &Line,
265 const AnnotatedToken &Left,
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000266 const AnnotatedToken &Right);
267
Daniel Jasper8ff690a2013-02-06 14:22:40 +0000268 bool spaceRequiredBefore(const AnnotatedLine &Line,
269 const AnnotatedToken &Tok);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000270
Daniel Jasper8ff690a2013-02-06 14:22:40 +0000271 bool canBreakBefore(const AnnotatedLine &Line, const AnnotatedToken &Right);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000272
Daniel Jasperbf71ba22013-04-08 20:33:42 +0000273 void printDebugInfo(const AnnotatedLine &Line);
274
Manuel Klimeke573c3f2013-05-22 12:51:29 +0000275 void calculateUnbreakableTailLengths(AnnotatedLine &Line);
276
Daniel Jasper8ff690a2013-02-06 14:22:40 +0000277 const FormatStyle &Style;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000278 SourceManager &SourceMgr;
279 Lexer &Lex;
Nico Weberc2e6d2a2013-02-11 15:32:15 +0000280
281 // Contextual keywords:
282 IdentifierInfo &Ident_in;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000283};
284
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000285} // end namespace format
286} // end namespace clang
287
288#endif // LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H