blob: 6006ec87c03d9d33b13e1f5d393f4f3b915459c7 [file] [log] [blame]
Alexander Kornienko3b711552013-06-03 16:45:03 +00001//===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file contains the declaration of the FormatToken, a wrapper
12/// around Token with additional information related to formatting.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_CLANG_FORMAT_FORMAT_TOKEN_H
17#define LLVM_CLANG_FORMAT_FORMAT_TOKEN_H
18
19#include "clang/Basic/OperatorPrecedence.h"
Daniel Jasperd4a03db2013-08-22 15:00:41 +000020#include "clang/Format/Format.h"
Alexander Kornienko3b711552013-06-03 16:45:03 +000021#include "clang/Lex/Lexer.h"
Daniel Jasperd4a03db2013-08-22 15:00:41 +000022#include "llvm/ADT/OwningPtr.h"
Alexander Kornienko3b711552013-06-03 16:45:03 +000023
24namespace clang {
25namespace format {
26
27enum TokenType {
28 TT_BinaryOperator,
29 TT_BlockComment,
30 TT_CastRParen,
31 TT_ConditionalExpr,
32 TT_CtorInitializerColon,
Daniel Jaspere8b10d32013-07-26 16:56:36 +000033 TT_CtorInitializerComma,
Alexander Kornienko3b711552013-06-03 16:45:03 +000034 TT_DesignatedInitializerPeriod,
35 TT_ImplicitStringLiteral,
36 TT_InlineASMColon,
37 TT_InheritanceColon,
38 TT_FunctionTypeLParen,
39 TT_LineComment,
40 TT_ObjCArrayLiteral,
41 TT_ObjCBlockLParen,
42 TT_ObjCDecl,
43 TT_ObjCDictLiteral,
44 TT_ObjCForIn,
45 TT_ObjCMethodExpr,
46 TT_ObjCMethodSpecifier,
47 TT_ObjCProperty,
48 TT_ObjCSelectorName,
49 TT_OverloadedOperator,
50 TT_OverloadedOperatorLParen,
51 TT_PointerOrReference,
52 TT_PureVirtualSpecifier,
53 TT_RangeBasedForLoopColon,
54 TT_StartOfName,
55 TT_TemplateCloser,
56 TT_TemplateOpener,
Daniel Jasper2ca37412013-07-09 14:36:48 +000057 TT_TrailingReturnArrow,
Alexander Kornienko3b711552013-06-03 16:45:03 +000058 TT_TrailingUnaryOperator,
59 TT_UnaryOperator,
60 TT_Unknown
61};
62
Daniel Jasper0de1c4d2013-07-09 09:06:29 +000063// Represents what type of block a set of braces open.
64enum BraceBlockKind {
65 BK_Unknown,
66 BK_Block,
67 BK_BracedInit
68};
69
Daniel Jasperc7bd68f2013-07-10 14:02:49 +000070// The packing kind of a function's parameters.
71enum ParameterPackingKind {
72 PPK_BinPacked,
73 PPK_OnePerLine,
74 PPK_Inconclusive
75};
76
Daniel Jasperd4a03db2013-08-22 15:00:41 +000077class TokenRole;
78
Alexander Kornienko3b711552013-06-03 16:45:03 +000079/// \brief A wrapper around a \c Token storing information about the
80/// whitespace characters preceeding it.
81struct FormatToken {
82 FormatToken()
Alexander Kornienkodcc0c5b2013-08-29 17:32:57 +000083 : NewlinesBefore(0), HasUnescapedNewline(false), IsMultiline(false),
84 LastNewlineOffset(0), CodePointCount(0), IsFirst(false),
85 MustBreakBefore(false), IsUnterminatedLiteral(false),
86 BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0),
87 CanBreakBefore(false), ClosesTemplateDeclaration(false),
88 ParameterCount(0), PackingKind(PPK_Inconclusive), TotalLength(0),
89 UnbreakableTailLength(0), BindingStrength(0), SplitPenalty(0),
90 LongestObjCSelectorName(0), FakeRParens(0), LastInChainOfCalls(false),
Alexander Kornienko3b711552013-06-03 16:45:03 +000091 PartOfMultiVariableDeclStmt(false), MatchingParen(NULL), Previous(NULL),
92 Next(NULL) {}
93
94 /// \brief The \c Token.
95 Token Tok;
96
97 /// \brief The number of newlines immediately before the \c Token.
98 ///
99 /// This can be used to determine what the user wrote in the original code
100 /// and thereby e.g. leave an empty line between two function definitions.
101 unsigned NewlinesBefore;
102
103 /// \brief Whether there is at least one unescaped newline before the \c
104 /// Token.
105 bool HasUnescapedNewline;
106
Alexander Kornienkodcc0c5b2013-08-29 17:32:57 +0000107 /// \brief Whether the token text contains newlines (escaped or not).
108 bool IsMultiline;
109
Alexander Kornienko3b711552013-06-03 16:45:03 +0000110 /// \brief The range of the whitespace immediately preceeding the \c Token.
111 SourceRange WhitespaceRange;
112
113 /// \brief The offset just past the last '\n' in this token's leading
114 /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
115 unsigned LastNewlineOffset;
116
Alexander Kornienko00895102013-06-05 14:09:10 +0000117 /// \brief The length of the non-whitespace parts of the token in CodePoints.
118 /// We need this to correctly measure number of columns a token spans.
119 unsigned CodePointCount;
Alexander Kornienko3b711552013-06-03 16:45:03 +0000120
121 /// \brief Indicates that this is the first token.
122 bool IsFirst;
123
124 /// \brief Whether there must be a line break before this token.
125 ///
126 /// This happens for example when a preprocessor directive ended directly
127 /// before the token.
128 bool MustBreakBefore;
129
130 /// \brief Returns actual token start location without leading escaped
131 /// newlines and whitespace.
132 ///
133 /// This can be different to Tok.getLocation(), which includes leading escaped
134 /// newlines.
135 SourceLocation getStartOfNonWhitespace() const {
136 return WhitespaceRange.getEnd();
137 }
138
139 /// \brief The raw text of the token.
140 ///
141 /// Contains the raw token text without leading whitespace and without leading
142 /// escaped newlines.
143 StringRef TokenText;
144
Daniel Jasper561211d2013-07-16 20:28:33 +0000145 /// \brief Set to \c true if this token is an unterminated literal.
146 bool IsUnterminatedLiteral;
147
Daniel Jasper0de1c4d2013-07-09 09:06:29 +0000148 /// \brief Contains the kind of block if this token is a brace.
149 BraceBlockKind BlockKind;
150
Alexander Kornienko3b711552013-06-03 16:45:03 +0000151 TokenType Type;
152
Daniel Jasperd4a03db2013-08-22 15:00:41 +0000153 /// \brief The number of spaces that should be inserted before this token.
Alexander Kornienko3b711552013-06-03 16:45:03 +0000154 unsigned SpacesRequiredBefore;
Daniel Jasperd4a03db2013-08-22 15:00:41 +0000155
156 /// \brief \c true if it is allowed to break before this token.
Alexander Kornienko3b711552013-06-03 16:45:03 +0000157 bool CanBreakBefore;
158
159 bool ClosesTemplateDeclaration;
160
161 /// \brief Number of parameters, if this is "(", "[" or "<".
162 ///
163 /// This is initialized to 1 as we don't need to distinguish functions with
164 /// 0 parameters from functions with 1 parameter. Thus, we can simply count
165 /// the number of commas.
166 unsigned ParameterCount;
167
Daniel Jasperd4a03db2013-08-22 15:00:41 +0000168 /// \brief A token can have a special role that can carry extra information
169 /// about the token's formatting.
170 llvm::OwningPtr<TokenRole> Role;
171
Daniel Jasperc7bd68f2013-07-10 14:02:49 +0000172 /// \brief If this is an opening parenthesis, how are the parameters packed?
173 ParameterPackingKind PackingKind;
174
Manuel Klimekc41e8192013-08-29 15:21:40 +0000175 /// \brief The total length of the unwrapped line up to and including this
176 /// token.
Alexander Kornienko3b711552013-06-03 16:45:03 +0000177 unsigned TotalLength;
178
Manuel Klimekc41e8192013-08-29 15:21:40 +0000179 /// \brief The original column of this token, including expanded tabs.
180 /// The configured IndentWidth is used as tab width. Only tabs in whitespace
181 /// are expanded.
182 /// FIXME: This is currently only used on the first token of an unwrapped
183 /// line, and the implementation is not correct for other tokens (see the
184 /// FIXMEs in FormatTokenLexer::getNextToken()).
185 unsigned OriginalColumn;
186
Alexander Kornienko3b711552013-06-03 16:45:03 +0000187 /// \brief The length of following tokens until the next natural split point,
188 /// or the next token that can be broken.
189 unsigned UnbreakableTailLength;
190
191 // FIXME: Come up with a 'cleaner' concept.
192 /// \brief The binding strength of a token. This is a combined value of
193 /// operator precedence, parenthesis nesting, etc.
194 unsigned BindingStrength;
195
196 /// \brief Penalty for inserting a line break before this token.
197 unsigned SplitPenalty;
198
199 /// \brief If this is the first ObjC selector name in an ObjC method
200 /// definition or call, this contains the length of the longest name.
201 unsigned LongestObjCSelectorName;
202
203 /// \brief Stores the number of required fake parentheses and the
204 /// corresponding operator precedence.
205 ///
206 /// If multiple fake parentheses start at a token, this vector stores them in
207 /// reverse order, i.e. inner fake parenthesis first.
208 SmallVector<prec::Level, 4> FakeLParens;
209 /// \brief Insert this many fake ) after this token for correct indentation.
210 unsigned FakeRParens;
211
212 /// \brief Is this the last "." or "->" in a builder-type call?
213 bool LastInChainOfCalls;
214
215 /// \brief Is this token part of a \c DeclStmt defining multiple variables?
216 ///
217 /// Only set if \c Type == \c TT_StartOfName.
218 bool PartOfMultiVariableDeclStmt;
219
220 bool is(tok::TokenKind Kind) const { return Tok.is(Kind); }
221
222 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
223 return is(K1) || is(K2);
224 }
225
226 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3) const {
227 return is(K1) || is(K2) || is(K3);
228 }
229
230 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3,
231 tok::TokenKind K4, tok::TokenKind K5 = tok::NUM_TOKENS,
232 tok::TokenKind K6 = tok::NUM_TOKENS,
233 tok::TokenKind K7 = tok::NUM_TOKENS,
234 tok::TokenKind K8 = tok::NUM_TOKENS,
235 tok::TokenKind K9 = tok::NUM_TOKENS,
236 tok::TokenKind K10 = tok::NUM_TOKENS,
237 tok::TokenKind K11 = tok::NUM_TOKENS,
238 tok::TokenKind K12 = tok::NUM_TOKENS) const {
239 return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) ||
240 is(K8) || is(K9) || is(K10) || is(K11) || is(K12);
241 }
242
243 bool isNot(tok::TokenKind Kind) const { return Tok.isNot(Kind); }
244
245 bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
246 return Tok.isObjCAtKeyword(Kind);
247 }
248
249 bool isAccessSpecifier(bool ColonRequired = true) const {
250 return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) &&
251 (!ColonRequired || (Next && Next->is(tok::colon)));
252 }
253
254 bool isObjCAccessSpecifier() const {
255 return is(tok::at) && Next && (Next->isObjCAtKeyword(tok::objc_public) ||
256 Next->isObjCAtKeyword(tok::objc_protected) ||
257 Next->isObjCAtKeyword(tok::objc_package) ||
258 Next->isObjCAtKeyword(tok::objc_private));
259 }
260
261 /// \brief Returns whether \p Tok is ([{ or a template opening <.
262 bool opensScope() const {
263 return isOneOf(tok::l_paren, tok::l_brace, tok::l_square) ||
264 Type == TT_TemplateOpener;
Alexander Kornienko3b711552013-06-03 16:45:03 +0000265 }
Nico Weber0e5a8882013-06-25 19:25:12 +0000266 /// \brief Returns whether \p Tok is )]} or a template closing >.
Alexander Kornienko3b711552013-06-03 16:45:03 +0000267 bool closesScope() const {
268 return isOneOf(tok::r_paren, tok::r_brace, tok::r_square) ||
269 Type == TT_TemplateCloser;
270 }
271
Daniel Jasperd3fef0f2013-08-27 14:24:43 +0000272 /// \brief Returns \c true if this is a "." or "->" accessing a member.
273 bool isMemberAccess() const {
274 return isOneOf(tok::arrow, tok::period) &&
275 Type != TT_DesignatedInitializerPeriod;
276 }
277
Alexander Kornienko3b711552013-06-03 16:45:03 +0000278 bool isUnaryOperator() const {
279 switch (Tok.getKind()) {
280 case tok::plus:
281 case tok::plusplus:
282 case tok::minus:
283 case tok::minusminus:
284 case tok::exclaim:
285 case tok::tilde:
286 case tok::kw_sizeof:
287 case tok::kw_alignof:
288 return true;
289 default:
290 return false;
291 }
292 }
Daniel Jasperd3fef0f2013-08-27 14:24:43 +0000293
Alexander Kornienko3b711552013-06-03 16:45:03 +0000294 bool isBinaryOperator() const {
295 // Comma is a binary operator, but does not behave as such wrt. formatting.
296 return getPrecedence() > prec::Comma;
297 }
Daniel Jasperd3fef0f2013-08-27 14:24:43 +0000298
Alexander Kornienko3b711552013-06-03 16:45:03 +0000299 bool isTrailingComment() const {
300 return is(tok::comment) && (!Next || Next->NewlinesBefore > 0);
301 }
302
303 prec::Level getPrecedence() const {
304 return getBinOpPrecedence(Tok.getKind(), true, true);
305 }
306
307 /// \brief Returns the previous token ignoring comments.
Alexander Kornienko0bdc6432013-07-04 14:47:51 +0000308 FormatToken *getPreviousNonComment() const {
Alexander Kornienko3b711552013-06-03 16:45:03 +0000309 FormatToken *Tok = Previous;
310 while (Tok != NULL && Tok->is(tok::comment))
311 Tok = Tok->Previous;
312 return Tok;
313 }
314
315 /// \brief Returns the next token ignoring comments.
Alexander Kornienko0bdc6432013-07-04 14:47:51 +0000316 const FormatToken *getNextNonComment() const {
Alexander Kornienko3b711552013-06-03 16:45:03 +0000317 const FormatToken *Tok = Next;
318 while (Tok != NULL && Tok->is(tok::comment))
319 Tok = Tok->Next;
320 return Tok;
321 }
322
323 FormatToken *MatchingParen;
324
325 FormatToken *Previous;
326 FormatToken *Next;
327
328private:
329 // Disallow copying.
Craig Topper53d4f312013-07-01 04:07:34 +0000330 FormatToken(const FormatToken &) LLVM_DELETED_FUNCTION;
331 void operator=(const FormatToken &) LLVM_DELETED_FUNCTION;
Alexander Kornienko3b711552013-06-03 16:45:03 +0000332};
333
Daniel Jasperd4a03db2013-08-22 15:00:41 +0000334class ContinuationIndenter;
335struct LineState;
336
337class TokenRole {
338public:
339 TokenRole(const FormatStyle &Style) : Style(Style) {}
340 virtual ~TokenRole();
341
342 /// \brief After the \c TokenAnnotator has finished annotating all the tokens,
343 /// this function precomputes required information for formatting.
344 virtual void precomputeFormattingInfos(const FormatToken *Token);
345
346 /// \brief Apply the special formatting that the given role demands.
347 ///
348 /// Continues formatting from \p State leaving indentation to \p Indenter and
349 /// returns the total penalty that this formatting incurs.
350 virtual unsigned format(LineState &State, ContinuationIndenter *Indenter,
351 bool DryRun) {
352 return 0;
353 }
354
355 /// \brief Notifies the \c Role that a comma was found.
356 virtual void CommaFound(const FormatToken *Token) {}
357
358protected:
359 const FormatStyle &Style;
360};
361
362class CommaSeparatedList : public TokenRole {
363public:
364 CommaSeparatedList(const FormatStyle &Style) : TokenRole(Style) {}
365
366 virtual void precomputeFormattingInfos(const FormatToken *Token);
367
368 virtual unsigned format(LineState &State, ContinuationIndenter *Indenter,
369 bool DryRun);
370
371 /// \brief Adds \p Token as the next comma to the \c CommaSeparated list.
372 virtual void CommaFound(const FormatToken *Token) { Commas.push_back(Token); }
373
374private:
375 /// \brief A struct that holds information on how to format a given list with
376 /// a specific number of columns.
377 struct ColumnFormat {
378 /// \brief The number of columns to use.
379 unsigned Columns;
380
381 /// \brief The total width in characters.
382 unsigned TotalWidth;
383
384 /// \brief The number of lines required for this format.
385 unsigned LineCount;
386
387 /// \brief The size of each column in characters.
388 SmallVector<unsigned, 8> ColumnSizes;
389 };
390
391 /// \brief Calculate which \c ColumnFormat fits best into
392 /// \p RemainingCharacters.
393 const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const;
394
395 /// \brief The ordered \c FormatTokens making up the commas of this list.
396 SmallVector<const FormatToken *, 8> Commas;
397
398 /// \brief The length of each of the list's items in characters including the
399 /// trailing comma.
400 SmallVector<unsigned, 8> ItemLengths;
401
402 /// \brief Precomputed formats that can be used for this list.
403 SmallVector<ColumnFormat, 4> Formats;
404};
405
Alexander Kornienko3b711552013-06-03 16:45:03 +0000406} // namespace format
407} // namespace clang
408
409#endif // LLVM_CLANG_FORMAT_FORMAT_TOKEN_H