blob: 24d4c59e61adc6392e51f3b3f94eca5fbf70015a [file] [log] [blame]
Alexander Kornienko3b711552013-06-03 16:45:03 +00001//===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file contains the declaration of the FormatToken, a wrapper
12/// around Token with additional information related to formatting.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_CLANG_FORMAT_FORMAT_TOKEN_H
17#define LLVM_CLANG_FORMAT_FORMAT_TOKEN_H
18
19#include "clang/Basic/OperatorPrecedence.h"
Daniel Jasperd4a03db2013-08-22 15:00:41 +000020#include "clang/Format/Format.h"
Alexander Kornienko3b711552013-06-03 16:45:03 +000021#include "clang/Lex/Lexer.h"
Daniel Jasperd4a03db2013-08-22 15:00:41 +000022#include "llvm/ADT/OwningPtr.h"
Alexander Kornienko3b711552013-06-03 16:45:03 +000023
24namespace clang {
25namespace format {
26
27enum TokenType {
28 TT_BinaryOperator,
29 TT_BlockComment,
30 TT_CastRParen,
31 TT_ConditionalExpr,
32 TT_CtorInitializerColon,
Daniel Jaspere8b10d32013-07-26 16:56:36 +000033 TT_CtorInitializerComma,
Alexander Kornienko3b711552013-06-03 16:45:03 +000034 TT_DesignatedInitializerPeriod,
35 TT_ImplicitStringLiteral,
36 TT_InlineASMColon,
37 TT_InheritanceColon,
38 TT_FunctionTypeLParen,
39 TT_LineComment,
40 TT_ObjCArrayLiteral,
41 TT_ObjCBlockLParen,
42 TT_ObjCDecl,
43 TT_ObjCDictLiteral,
44 TT_ObjCForIn,
45 TT_ObjCMethodExpr,
46 TT_ObjCMethodSpecifier,
47 TT_ObjCProperty,
48 TT_ObjCSelectorName,
49 TT_OverloadedOperator,
50 TT_OverloadedOperatorLParen,
51 TT_PointerOrReference,
52 TT_PureVirtualSpecifier,
53 TT_RangeBasedForLoopColon,
54 TT_StartOfName,
55 TT_TemplateCloser,
56 TT_TemplateOpener,
Daniel Jasper2ca37412013-07-09 14:36:48 +000057 TT_TrailingReturnArrow,
Alexander Kornienko3b711552013-06-03 16:45:03 +000058 TT_TrailingUnaryOperator,
59 TT_UnaryOperator,
60 TT_Unknown
61};
62
Daniel Jasper0de1c4d2013-07-09 09:06:29 +000063// Represents what type of block a set of braces open.
64enum BraceBlockKind {
65 BK_Unknown,
66 BK_Block,
67 BK_BracedInit
68};
69
Daniel Jasperc7bd68f2013-07-10 14:02:49 +000070// The packing kind of a function's parameters.
71enum ParameterPackingKind {
72 PPK_BinPacked,
73 PPK_OnePerLine,
74 PPK_Inconclusive
75};
76
Daniel Jasperd4a03db2013-08-22 15:00:41 +000077class TokenRole;
78
Alexander Kornienko3b711552013-06-03 16:45:03 +000079/// \brief A wrapper around a \c Token storing information about the
80/// whitespace characters preceeding it.
81struct FormatToken {
82 FormatToken()
Alexander Kornienko4b762a92013-09-02 13:58:14 +000083 : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
84 CodePointCount(0), CodePointsInFirstLine(0), CodePointsInLastLine(0),
85 IsFirst(false), MustBreakBefore(false), IsUnterminatedLiteral(false),
Alexander Kornienkodcc0c5b2013-08-29 17:32:57 +000086 BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0),
87 CanBreakBefore(false), ClosesTemplateDeclaration(false),
88 ParameterCount(0), PackingKind(PPK_Inconclusive), TotalLength(0),
89 UnbreakableTailLength(0), BindingStrength(0), SplitPenalty(0),
90 LongestObjCSelectorName(0), FakeRParens(0), LastInChainOfCalls(false),
Alexander Kornienko3b711552013-06-03 16:45:03 +000091 PartOfMultiVariableDeclStmt(false), MatchingParen(NULL), Previous(NULL),
92 Next(NULL) {}
93
94 /// \brief The \c Token.
95 Token Tok;
96
97 /// \brief The number of newlines immediately before the \c Token.
98 ///
99 /// This can be used to determine what the user wrote in the original code
100 /// and thereby e.g. leave an empty line between two function definitions.
101 unsigned NewlinesBefore;
102
103 /// \brief Whether there is at least one unescaped newline before the \c
104 /// Token.
105 bool HasUnescapedNewline;
106
107 /// \brief The range of the whitespace immediately preceeding the \c Token.
108 SourceRange WhitespaceRange;
109
110 /// \brief The offset just past the last '\n' in this token's leading
111 /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
112 unsigned LastNewlineOffset;
113
Alexander Kornienko00895102013-06-05 14:09:10 +0000114 /// \brief The length of the non-whitespace parts of the token in CodePoints.
115 /// We need this to correctly measure number of columns a token spans.
116 unsigned CodePointCount;
Alexander Kornienko3b711552013-06-03 16:45:03 +0000117
Alexander Kornienko4b762a92013-09-02 13:58:14 +0000118 /// \brief Contains the number of code points in the first line of a
119 /// multi-line string literal or comment. Zero if there's no newline in the
120 /// token.
121 unsigned CodePointsInFirstLine;
122
123 /// \brief Contains the number of code points in the last line of a
124 /// multi-line string literal or comment. Can be zero for line comments.
125 unsigned CodePointsInLastLine;
126
127 /// \brief Returns \c true if the token text contains newlines (escaped or
128 /// not).
129 bool isMultiline() const { return CodePointsInFirstLine != 0; }
130
Alexander Kornienko3b711552013-06-03 16:45:03 +0000131 /// \brief Indicates that this is the first token.
132 bool IsFirst;
133
134 /// \brief Whether there must be a line break before this token.
135 ///
136 /// This happens for example when a preprocessor directive ended directly
137 /// before the token.
138 bool MustBreakBefore;
139
140 /// \brief Returns actual token start location without leading escaped
141 /// newlines and whitespace.
142 ///
143 /// This can be different to Tok.getLocation(), which includes leading escaped
144 /// newlines.
145 SourceLocation getStartOfNonWhitespace() const {
146 return WhitespaceRange.getEnd();
147 }
148
149 /// \brief The raw text of the token.
150 ///
151 /// Contains the raw token text without leading whitespace and without leading
152 /// escaped newlines.
153 StringRef TokenText;
154
Daniel Jasper561211d2013-07-16 20:28:33 +0000155 /// \brief Set to \c true if this token is an unterminated literal.
156 bool IsUnterminatedLiteral;
157
Daniel Jasper0de1c4d2013-07-09 09:06:29 +0000158 /// \brief Contains the kind of block if this token is a brace.
159 BraceBlockKind BlockKind;
160
Alexander Kornienko3b711552013-06-03 16:45:03 +0000161 TokenType Type;
162
Daniel Jasperd4a03db2013-08-22 15:00:41 +0000163 /// \brief The number of spaces that should be inserted before this token.
Alexander Kornienko3b711552013-06-03 16:45:03 +0000164 unsigned SpacesRequiredBefore;
Daniel Jasperd4a03db2013-08-22 15:00:41 +0000165
166 /// \brief \c true if it is allowed to break before this token.
Alexander Kornienko3b711552013-06-03 16:45:03 +0000167 bool CanBreakBefore;
168
169 bool ClosesTemplateDeclaration;
170
171 /// \brief Number of parameters, if this is "(", "[" or "<".
172 ///
173 /// This is initialized to 1 as we don't need to distinguish functions with
174 /// 0 parameters from functions with 1 parameter. Thus, we can simply count
175 /// the number of commas.
176 unsigned ParameterCount;
177
Daniel Jasperd4a03db2013-08-22 15:00:41 +0000178 /// \brief A token can have a special role that can carry extra information
179 /// about the token's formatting.
180 llvm::OwningPtr<TokenRole> Role;
181
Daniel Jasperc7bd68f2013-07-10 14:02:49 +0000182 /// \brief If this is an opening parenthesis, how are the parameters packed?
183 ParameterPackingKind PackingKind;
184
Manuel Klimekc41e8192013-08-29 15:21:40 +0000185 /// \brief The total length of the unwrapped line up to and including this
186 /// token.
Alexander Kornienko3b711552013-06-03 16:45:03 +0000187 unsigned TotalLength;
188
Manuel Klimekc41e8192013-08-29 15:21:40 +0000189 /// \brief The original column of this token, including expanded tabs.
190 /// The configured IndentWidth is used as tab width. Only tabs in whitespace
191 /// are expanded.
192 /// FIXME: This is currently only used on the first token of an unwrapped
193 /// line, and the implementation is not correct for other tokens (see the
194 /// FIXMEs in FormatTokenLexer::getNextToken()).
195 unsigned OriginalColumn;
196
Alexander Kornienko3b711552013-06-03 16:45:03 +0000197 /// \brief The length of following tokens until the next natural split point,
198 /// or the next token that can be broken.
199 unsigned UnbreakableTailLength;
200
201 // FIXME: Come up with a 'cleaner' concept.
202 /// \brief The binding strength of a token. This is a combined value of
203 /// operator precedence, parenthesis nesting, etc.
204 unsigned BindingStrength;
205
206 /// \brief Penalty for inserting a line break before this token.
207 unsigned SplitPenalty;
208
209 /// \brief If this is the first ObjC selector name in an ObjC method
210 /// definition or call, this contains the length of the longest name.
211 unsigned LongestObjCSelectorName;
212
213 /// \brief Stores the number of required fake parentheses and the
214 /// corresponding operator precedence.
215 ///
216 /// If multiple fake parentheses start at a token, this vector stores them in
217 /// reverse order, i.e. inner fake parenthesis first.
218 SmallVector<prec::Level, 4> FakeLParens;
219 /// \brief Insert this many fake ) after this token for correct indentation.
220 unsigned FakeRParens;
221
222 /// \brief Is this the last "." or "->" in a builder-type call?
223 bool LastInChainOfCalls;
224
225 /// \brief Is this token part of a \c DeclStmt defining multiple variables?
226 ///
227 /// Only set if \c Type == \c TT_StartOfName.
228 bool PartOfMultiVariableDeclStmt;
229
230 bool is(tok::TokenKind Kind) const { return Tok.is(Kind); }
231
232 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
233 return is(K1) || is(K2);
234 }
235
236 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3) const {
237 return is(K1) || is(K2) || is(K3);
238 }
239
240 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3,
241 tok::TokenKind K4, tok::TokenKind K5 = tok::NUM_TOKENS,
242 tok::TokenKind K6 = tok::NUM_TOKENS,
243 tok::TokenKind K7 = tok::NUM_TOKENS,
244 tok::TokenKind K8 = tok::NUM_TOKENS,
245 tok::TokenKind K9 = tok::NUM_TOKENS,
246 tok::TokenKind K10 = tok::NUM_TOKENS,
247 tok::TokenKind K11 = tok::NUM_TOKENS,
248 tok::TokenKind K12 = tok::NUM_TOKENS) const {
249 return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) ||
250 is(K8) || is(K9) || is(K10) || is(K11) || is(K12);
251 }
252
253 bool isNot(tok::TokenKind Kind) const { return Tok.isNot(Kind); }
254
255 bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
256 return Tok.isObjCAtKeyword(Kind);
257 }
258
259 bool isAccessSpecifier(bool ColonRequired = true) const {
260 return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) &&
261 (!ColonRequired || (Next && Next->is(tok::colon)));
262 }
263
264 bool isObjCAccessSpecifier() const {
265 return is(tok::at) && Next && (Next->isObjCAtKeyword(tok::objc_public) ||
266 Next->isObjCAtKeyword(tok::objc_protected) ||
267 Next->isObjCAtKeyword(tok::objc_package) ||
268 Next->isObjCAtKeyword(tok::objc_private));
269 }
270
271 /// \brief Returns whether \p Tok is ([{ or a template opening <.
272 bool opensScope() const {
273 return isOneOf(tok::l_paren, tok::l_brace, tok::l_square) ||
274 Type == TT_TemplateOpener;
Alexander Kornienko3b711552013-06-03 16:45:03 +0000275 }
Nico Weber0e5a8882013-06-25 19:25:12 +0000276 /// \brief Returns whether \p Tok is )]} or a template closing >.
Alexander Kornienko3b711552013-06-03 16:45:03 +0000277 bool closesScope() const {
278 return isOneOf(tok::r_paren, tok::r_brace, tok::r_square) ||
279 Type == TT_TemplateCloser;
280 }
281
Daniel Jasperd3fef0f2013-08-27 14:24:43 +0000282 /// \brief Returns \c true if this is a "." or "->" accessing a member.
283 bool isMemberAccess() const {
284 return isOneOf(tok::arrow, tok::period) &&
285 Type != TT_DesignatedInitializerPeriod;
286 }
287
Alexander Kornienko3b711552013-06-03 16:45:03 +0000288 bool isUnaryOperator() const {
289 switch (Tok.getKind()) {
290 case tok::plus:
291 case tok::plusplus:
292 case tok::minus:
293 case tok::minusminus:
294 case tok::exclaim:
295 case tok::tilde:
296 case tok::kw_sizeof:
297 case tok::kw_alignof:
298 return true;
299 default:
300 return false;
301 }
302 }
Daniel Jasperd3fef0f2013-08-27 14:24:43 +0000303
Alexander Kornienko3b711552013-06-03 16:45:03 +0000304 bool isBinaryOperator() const {
305 // Comma is a binary operator, but does not behave as such wrt. formatting.
306 return getPrecedence() > prec::Comma;
307 }
Daniel Jasperd3fef0f2013-08-27 14:24:43 +0000308
Alexander Kornienko3b711552013-06-03 16:45:03 +0000309 bool isTrailingComment() const {
310 return is(tok::comment) && (!Next || Next->NewlinesBefore > 0);
311 }
312
313 prec::Level getPrecedence() const {
314 return getBinOpPrecedence(Tok.getKind(), true, true);
315 }
316
317 /// \brief Returns the previous token ignoring comments.
Alexander Kornienko0bdc6432013-07-04 14:47:51 +0000318 FormatToken *getPreviousNonComment() const {
Alexander Kornienko3b711552013-06-03 16:45:03 +0000319 FormatToken *Tok = Previous;
320 while (Tok != NULL && Tok->is(tok::comment))
321 Tok = Tok->Previous;
322 return Tok;
323 }
324
325 /// \brief Returns the next token ignoring comments.
Alexander Kornienko0bdc6432013-07-04 14:47:51 +0000326 const FormatToken *getNextNonComment() const {
Alexander Kornienko3b711552013-06-03 16:45:03 +0000327 const FormatToken *Tok = Next;
328 while (Tok != NULL && Tok->is(tok::comment))
329 Tok = Tok->Next;
330 return Tok;
331 }
332
333 FormatToken *MatchingParen;
334
335 FormatToken *Previous;
336 FormatToken *Next;
337
338private:
339 // Disallow copying.
Craig Topper53d4f312013-07-01 04:07:34 +0000340 FormatToken(const FormatToken &) LLVM_DELETED_FUNCTION;
341 void operator=(const FormatToken &) LLVM_DELETED_FUNCTION;
Alexander Kornienko3b711552013-06-03 16:45:03 +0000342};
343
Daniel Jasperd4a03db2013-08-22 15:00:41 +0000344class ContinuationIndenter;
345struct LineState;
346
347class TokenRole {
348public:
349 TokenRole(const FormatStyle &Style) : Style(Style) {}
350 virtual ~TokenRole();
351
352 /// \brief After the \c TokenAnnotator has finished annotating all the tokens,
353 /// this function precomputes required information for formatting.
354 virtual void precomputeFormattingInfos(const FormatToken *Token);
355
356 /// \brief Apply the special formatting that the given role demands.
357 ///
358 /// Continues formatting from \p State leaving indentation to \p Indenter and
359 /// returns the total penalty that this formatting incurs.
360 virtual unsigned format(LineState &State, ContinuationIndenter *Indenter,
361 bool DryRun) {
362 return 0;
363 }
364
365 /// \brief Notifies the \c Role that a comma was found.
366 virtual void CommaFound(const FormatToken *Token) {}
367
368protected:
369 const FormatStyle &Style;
370};
371
372class CommaSeparatedList : public TokenRole {
373public:
374 CommaSeparatedList(const FormatStyle &Style) : TokenRole(Style) {}
375
376 virtual void precomputeFormattingInfos(const FormatToken *Token);
377
378 virtual unsigned format(LineState &State, ContinuationIndenter *Indenter,
379 bool DryRun);
380
381 /// \brief Adds \p Token as the next comma to the \c CommaSeparated list.
382 virtual void CommaFound(const FormatToken *Token) { Commas.push_back(Token); }
383
384private:
385 /// \brief A struct that holds information on how to format a given list with
386 /// a specific number of columns.
387 struct ColumnFormat {
388 /// \brief The number of columns to use.
389 unsigned Columns;
390
391 /// \brief The total width in characters.
392 unsigned TotalWidth;
393
394 /// \brief The number of lines required for this format.
395 unsigned LineCount;
396
397 /// \brief The size of each column in characters.
398 SmallVector<unsigned, 8> ColumnSizes;
399 };
400
401 /// \brief Calculate which \c ColumnFormat fits best into
402 /// \p RemainingCharacters.
403 const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const;
404
405 /// \brief The ordered \c FormatTokens making up the commas of this list.
406 SmallVector<const FormatToken *, 8> Commas;
407
408 /// \brief The length of each of the list's items in characters including the
409 /// trailing comma.
410 SmallVector<unsigned, 8> ItemLengths;
411
412 /// \brief Precomputed formats that can be used for this list.
413 SmallVector<ColumnFormat, 4> Formats;
414};
415
Alexander Kornienko3b711552013-06-03 16:45:03 +0000416} // namespace format
417} // namespace clang
418
419#endif // LLVM_CLANG_FORMAT_FORMAT_TOKEN_H