blob: 950938395e5d93862eb874e1f3f79b9f6490b3bf [file] [log] [blame]
Alexander Kornienko3b711552013-06-03 16:45:03 +00001//===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file contains the declaration of the FormatToken, a wrapper
12/// around Token with additional information related to formatting.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_CLANG_FORMAT_FORMAT_TOKEN_H
17#define LLVM_CLANG_FORMAT_FORMAT_TOKEN_H
18
19#include "clang/Basic/OperatorPrecedence.h"
Daniel Jasperd4a03db2013-08-22 15:00:41 +000020#include "clang/Format/Format.h"
Alexander Kornienko3b711552013-06-03 16:45:03 +000021#include "clang/Lex/Lexer.h"
Daniel Jasperd4a03db2013-08-22 15:00:41 +000022#include "llvm/ADT/OwningPtr.h"
Alexander Kornienko3b711552013-06-03 16:45:03 +000023
24namespace clang {
25namespace format {
26
27enum TokenType {
28 TT_BinaryOperator,
29 TT_BlockComment,
30 TT_CastRParen,
31 TT_ConditionalExpr,
32 TT_CtorInitializerColon,
Daniel Jaspere8b10d32013-07-26 16:56:36 +000033 TT_CtorInitializerComma,
Alexander Kornienko3b711552013-06-03 16:45:03 +000034 TT_DesignatedInitializerPeriod,
35 TT_ImplicitStringLiteral,
36 TT_InlineASMColon,
37 TT_InheritanceColon,
38 TT_FunctionTypeLParen,
39 TT_LineComment,
40 TT_ObjCArrayLiteral,
41 TT_ObjCBlockLParen,
42 TT_ObjCDecl,
43 TT_ObjCDictLiteral,
44 TT_ObjCForIn,
45 TT_ObjCMethodExpr,
46 TT_ObjCMethodSpecifier,
47 TT_ObjCProperty,
48 TT_ObjCSelectorName,
49 TT_OverloadedOperator,
50 TT_OverloadedOperatorLParen,
51 TT_PointerOrReference,
52 TT_PureVirtualSpecifier,
53 TT_RangeBasedForLoopColon,
54 TT_StartOfName,
55 TT_TemplateCloser,
56 TT_TemplateOpener,
Daniel Jasper2ca37412013-07-09 14:36:48 +000057 TT_TrailingReturnArrow,
Alexander Kornienko3b711552013-06-03 16:45:03 +000058 TT_TrailingUnaryOperator,
59 TT_UnaryOperator,
60 TT_Unknown
61};
62
Daniel Jasper0de1c4d2013-07-09 09:06:29 +000063// Represents what type of block a set of braces open.
64enum BraceBlockKind {
65 BK_Unknown,
66 BK_Block,
67 BK_BracedInit
68};
69
Daniel Jasperc7bd68f2013-07-10 14:02:49 +000070// The packing kind of a function's parameters.
71enum ParameterPackingKind {
72 PPK_BinPacked,
73 PPK_OnePerLine,
74 PPK_Inconclusive
75};
76
Daniel Jasperd4a03db2013-08-22 15:00:41 +000077class TokenRole;
78
Alexander Kornienko3b711552013-06-03 16:45:03 +000079/// \brief A wrapper around a \c Token storing information about the
80/// whitespace characters preceeding it.
81struct FormatToken {
82 FormatToken()
83 : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
Alexander Kornienko54e6c9d2013-06-07 17:45:07 +000084 CodePointCount(0), IsFirst(false), MustBreakBefore(false),
Daniel Jasper561211d2013-07-16 20:28:33 +000085 IsUnterminatedLiteral(false), BlockKind(BK_Unknown), Type(TT_Unknown),
86 SpacesRequiredBefore(0), CanBreakBefore(false),
87 ClosesTemplateDeclaration(false), ParameterCount(0),
88 PackingKind(PPK_Inconclusive), TotalLength(0), UnbreakableTailLength(0),
89 BindingStrength(0), SplitPenalty(0), LongestObjCSelectorName(0),
90 FakeRParens(0), LastInChainOfCalls(false),
Alexander Kornienko3b711552013-06-03 16:45:03 +000091 PartOfMultiVariableDeclStmt(false), MatchingParen(NULL), Previous(NULL),
92 Next(NULL) {}
93
94 /// \brief The \c Token.
95 Token Tok;
96
97 /// \brief The number of newlines immediately before the \c Token.
98 ///
99 /// This can be used to determine what the user wrote in the original code
100 /// and thereby e.g. leave an empty line between two function definitions.
101 unsigned NewlinesBefore;
102
103 /// \brief Whether there is at least one unescaped newline before the \c
104 /// Token.
105 bool HasUnescapedNewline;
106
107 /// \brief The range of the whitespace immediately preceeding the \c Token.
108 SourceRange WhitespaceRange;
109
110 /// \brief The offset just past the last '\n' in this token's leading
111 /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
112 unsigned LastNewlineOffset;
113
Alexander Kornienko00895102013-06-05 14:09:10 +0000114 /// \brief The length of the non-whitespace parts of the token in CodePoints.
115 /// We need this to correctly measure number of columns a token spans.
116 unsigned CodePointCount;
Alexander Kornienko3b711552013-06-03 16:45:03 +0000117
118 /// \brief Indicates that this is the first token.
119 bool IsFirst;
120
121 /// \brief Whether there must be a line break before this token.
122 ///
123 /// This happens for example when a preprocessor directive ended directly
124 /// before the token.
125 bool MustBreakBefore;
126
127 /// \brief Returns actual token start location without leading escaped
128 /// newlines and whitespace.
129 ///
130 /// This can be different to Tok.getLocation(), which includes leading escaped
131 /// newlines.
132 SourceLocation getStartOfNonWhitespace() const {
133 return WhitespaceRange.getEnd();
134 }
135
136 /// \brief The raw text of the token.
137 ///
138 /// Contains the raw token text without leading whitespace and without leading
139 /// escaped newlines.
140 StringRef TokenText;
141
Daniel Jasper561211d2013-07-16 20:28:33 +0000142 /// \brief Set to \c true if this token is an unterminated literal.
143 bool IsUnterminatedLiteral;
144
Daniel Jasper0de1c4d2013-07-09 09:06:29 +0000145 /// \brief Contains the kind of block if this token is a brace.
146 BraceBlockKind BlockKind;
147
Alexander Kornienko3b711552013-06-03 16:45:03 +0000148 TokenType Type;
149
Daniel Jasperd4a03db2013-08-22 15:00:41 +0000150 /// \brief The number of spaces that should be inserted before this token.
Alexander Kornienko3b711552013-06-03 16:45:03 +0000151 unsigned SpacesRequiredBefore;
Daniel Jasperd4a03db2013-08-22 15:00:41 +0000152
153 /// \brief \c true if it is allowed to break before this token.
Alexander Kornienko3b711552013-06-03 16:45:03 +0000154 bool CanBreakBefore;
155
156 bool ClosesTemplateDeclaration;
157
158 /// \brief Number of parameters, if this is "(", "[" or "<".
159 ///
160 /// This is initialized to 1 as we don't need to distinguish functions with
161 /// 0 parameters from functions with 1 parameter. Thus, we can simply count
162 /// the number of commas.
163 unsigned ParameterCount;
164
Daniel Jasperd4a03db2013-08-22 15:00:41 +0000165 /// \brief A token can have a special role that can carry extra information
166 /// about the token's formatting.
167 llvm::OwningPtr<TokenRole> Role;
168
Daniel Jasperc7bd68f2013-07-10 14:02:49 +0000169 /// \brief If this is an opening parenthesis, how are the parameters packed?
170 ParameterPackingKind PackingKind;
171
Manuel Klimekc41e8192013-08-29 15:21:40 +0000172 /// \brief The total length of the unwrapped line up to and including this
173 /// token.
Alexander Kornienko3b711552013-06-03 16:45:03 +0000174 unsigned TotalLength;
175
Manuel Klimekc41e8192013-08-29 15:21:40 +0000176 /// \brief The original column of this token, including expanded tabs.
177 /// The configured IndentWidth is used as tab width. Only tabs in whitespace
178 /// are expanded.
179 /// FIXME: This is currently only used on the first token of an unwrapped
180 /// line, and the implementation is not correct for other tokens (see the
181 /// FIXMEs in FormatTokenLexer::getNextToken()).
182 unsigned OriginalColumn;
183
Alexander Kornienko3b711552013-06-03 16:45:03 +0000184 /// \brief The length of following tokens until the next natural split point,
185 /// or the next token that can be broken.
186 unsigned UnbreakableTailLength;
187
188 // FIXME: Come up with a 'cleaner' concept.
189 /// \brief The binding strength of a token. This is a combined value of
190 /// operator precedence, parenthesis nesting, etc.
191 unsigned BindingStrength;
192
193 /// \brief Penalty for inserting a line break before this token.
194 unsigned SplitPenalty;
195
196 /// \brief If this is the first ObjC selector name in an ObjC method
197 /// definition or call, this contains the length of the longest name.
198 unsigned LongestObjCSelectorName;
199
200 /// \brief Stores the number of required fake parentheses and the
201 /// corresponding operator precedence.
202 ///
203 /// If multiple fake parentheses start at a token, this vector stores them in
204 /// reverse order, i.e. inner fake parenthesis first.
205 SmallVector<prec::Level, 4> FakeLParens;
206 /// \brief Insert this many fake ) after this token for correct indentation.
207 unsigned FakeRParens;
208
209 /// \brief Is this the last "." or "->" in a builder-type call?
210 bool LastInChainOfCalls;
211
212 /// \brief Is this token part of a \c DeclStmt defining multiple variables?
213 ///
214 /// Only set if \c Type == \c TT_StartOfName.
215 bool PartOfMultiVariableDeclStmt;
216
217 bool is(tok::TokenKind Kind) const { return Tok.is(Kind); }
218
219 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
220 return is(K1) || is(K2);
221 }
222
223 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3) const {
224 return is(K1) || is(K2) || is(K3);
225 }
226
227 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3,
228 tok::TokenKind K4, tok::TokenKind K5 = tok::NUM_TOKENS,
229 tok::TokenKind K6 = tok::NUM_TOKENS,
230 tok::TokenKind K7 = tok::NUM_TOKENS,
231 tok::TokenKind K8 = tok::NUM_TOKENS,
232 tok::TokenKind K9 = tok::NUM_TOKENS,
233 tok::TokenKind K10 = tok::NUM_TOKENS,
234 tok::TokenKind K11 = tok::NUM_TOKENS,
235 tok::TokenKind K12 = tok::NUM_TOKENS) const {
236 return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) ||
237 is(K8) || is(K9) || is(K10) || is(K11) || is(K12);
238 }
239
240 bool isNot(tok::TokenKind Kind) const { return Tok.isNot(Kind); }
241
242 bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
243 return Tok.isObjCAtKeyword(Kind);
244 }
245
246 bool isAccessSpecifier(bool ColonRequired = true) const {
247 return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) &&
248 (!ColonRequired || (Next && Next->is(tok::colon)));
249 }
250
251 bool isObjCAccessSpecifier() const {
252 return is(tok::at) && Next && (Next->isObjCAtKeyword(tok::objc_public) ||
253 Next->isObjCAtKeyword(tok::objc_protected) ||
254 Next->isObjCAtKeyword(tok::objc_package) ||
255 Next->isObjCAtKeyword(tok::objc_private));
256 }
257
258 /// \brief Returns whether \p Tok is ([{ or a template opening <.
259 bool opensScope() const {
260 return isOneOf(tok::l_paren, tok::l_brace, tok::l_square) ||
261 Type == TT_TemplateOpener;
Alexander Kornienko3b711552013-06-03 16:45:03 +0000262 }
Nico Weber0e5a8882013-06-25 19:25:12 +0000263 /// \brief Returns whether \p Tok is )]} or a template closing >.
Alexander Kornienko3b711552013-06-03 16:45:03 +0000264 bool closesScope() const {
265 return isOneOf(tok::r_paren, tok::r_brace, tok::r_square) ||
266 Type == TT_TemplateCloser;
267 }
268
Daniel Jasperd3fef0f2013-08-27 14:24:43 +0000269 /// \brief Returns \c true if this is a "." or "->" accessing a member.
270 bool isMemberAccess() const {
271 return isOneOf(tok::arrow, tok::period) &&
272 Type != TT_DesignatedInitializerPeriod;
273 }
274
Alexander Kornienko3b711552013-06-03 16:45:03 +0000275 bool isUnaryOperator() const {
276 switch (Tok.getKind()) {
277 case tok::plus:
278 case tok::plusplus:
279 case tok::minus:
280 case tok::minusminus:
281 case tok::exclaim:
282 case tok::tilde:
283 case tok::kw_sizeof:
284 case tok::kw_alignof:
285 return true;
286 default:
287 return false;
288 }
289 }
Daniel Jasperd3fef0f2013-08-27 14:24:43 +0000290
Alexander Kornienko3b711552013-06-03 16:45:03 +0000291 bool isBinaryOperator() const {
292 // Comma is a binary operator, but does not behave as such wrt. formatting.
293 return getPrecedence() > prec::Comma;
294 }
Daniel Jasperd3fef0f2013-08-27 14:24:43 +0000295
Alexander Kornienko3b711552013-06-03 16:45:03 +0000296 bool isTrailingComment() const {
297 return is(tok::comment) && (!Next || Next->NewlinesBefore > 0);
298 }
299
300 prec::Level getPrecedence() const {
301 return getBinOpPrecedence(Tok.getKind(), true, true);
302 }
303
304 /// \brief Returns the previous token ignoring comments.
Alexander Kornienko0bdc6432013-07-04 14:47:51 +0000305 FormatToken *getPreviousNonComment() const {
Alexander Kornienko3b711552013-06-03 16:45:03 +0000306 FormatToken *Tok = Previous;
307 while (Tok != NULL && Tok->is(tok::comment))
308 Tok = Tok->Previous;
309 return Tok;
310 }
311
312 /// \brief Returns the next token ignoring comments.
Alexander Kornienko0bdc6432013-07-04 14:47:51 +0000313 const FormatToken *getNextNonComment() const {
Alexander Kornienko3b711552013-06-03 16:45:03 +0000314 const FormatToken *Tok = Next;
315 while (Tok != NULL && Tok->is(tok::comment))
316 Tok = Tok->Next;
317 return Tok;
318 }
319
320 FormatToken *MatchingParen;
321
322 FormatToken *Previous;
323 FormatToken *Next;
324
325private:
326 // Disallow copying.
Craig Topper53d4f312013-07-01 04:07:34 +0000327 FormatToken(const FormatToken &) LLVM_DELETED_FUNCTION;
328 void operator=(const FormatToken &) LLVM_DELETED_FUNCTION;
Alexander Kornienko3b711552013-06-03 16:45:03 +0000329};
330
Daniel Jasperd4a03db2013-08-22 15:00:41 +0000331class ContinuationIndenter;
332struct LineState;
333
334class TokenRole {
335public:
336 TokenRole(const FormatStyle &Style) : Style(Style) {}
337 virtual ~TokenRole();
338
339 /// \brief After the \c TokenAnnotator has finished annotating all the tokens,
340 /// this function precomputes required information for formatting.
341 virtual void precomputeFormattingInfos(const FormatToken *Token);
342
343 /// \brief Apply the special formatting that the given role demands.
344 ///
345 /// Continues formatting from \p State leaving indentation to \p Indenter and
346 /// returns the total penalty that this formatting incurs.
347 virtual unsigned format(LineState &State, ContinuationIndenter *Indenter,
348 bool DryRun) {
349 return 0;
350 }
351
352 /// \brief Notifies the \c Role that a comma was found.
353 virtual void CommaFound(const FormatToken *Token) {}
354
355protected:
356 const FormatStyle &Style;
357};
358
359class CommaSeparatedList : public TokenRole {
360public:
361 CommaSeparatedList(const FormatStyle &Style) : TokenRole(Style) {}
362
363 virtual void precomputeFormattingInfos(const FormatToken *Token);
364
365 virtual unsigned format(LineState &State, ContinuationIndenter *Indenter,
366 bool DryRun);
367
368 /// \brief Adds \p Token as the next comma to the \c CommaSeparated list.
369 virtual void CommaFound(const FormatToken *Token) { Commas.push_back(Token); }
370
371private:
372 /// \brief A struct that holds information on how to format a given list with
373 /// a specific number of columns.
374 struct ColumnFormat {
375 /// \brief The number of columns to use.
376 unsigned Columns;
377
378 /// \brief The total width in characters.
379 unsigned TotalWidth;
380
381 /// \brief The number of lines required for this format.
382 unsigned LineCount;
383
384 /// \brief The size of each column in characters.
385 SmallVector<unsigned, 8> ColumnSizes;
386 };
387
388 /// \brief Calculate which \c ColumnFormat fits best into
389 /// \p RemainingCharacters.
390 const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const;
391
392 /// \brief The ordered \c FormatTokens making up the commas of this list.
393 SmallVector<const FormatToken *, 8> Commas;
394
395 /// \brief The length of each of the list's items in characters including the
396 /// trailing comma.
397 SmallVector<unsigned, 8> ItemLengths;
398
399 /// \brief Precomputed formats that can be used for this list.
400 SmallVector<ColumnFormat, 4> Formats;
401};
402
Alexander Kornienko3b711552013-06-03 16:45:03 +0000403} // namespace format
404} // namespace clang
405
406#endif // LLVM_CLANG_FORMAT_FORMAT_TOKEN_H