blob: 150a9755e22e10acf930eb74ce49508ddfa471bb [file] [log] [blame]
Alexander Kornienko3b711552013-06-03 16:45:03 +00001//===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file contains the declaration of the FormatToken, a wrapper
12/// around Token with additional information related to formatting.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_CLANG_FORMAT_FORMAT_TOKEN_H
17#define LLVM_CLANG_FORMAT_FORMAT_TOKEN_H
18
19#include "clang/Basic/OperatorPrecedence.h"
Daniel Jasperd4a03db2013-08-22 15:00:41 +000020#include "clang/Format/Format.h"
Alexander Kornienko3b711552013-06-03 16:45:03 +000021#include "clang/Lex/Lexer.h"
Daniel Jasperd4a03db2013-08-22 15:00:41 +000022#include "llvm/ADT/OwningPtr.h"
Alexander Kornienko3b711552013-06-03 16:45:03 +000023
24namespace clang {
25namespace format {
26
27enum TokenType {
28 TT_BinaryOperator,
29 TT_BlockComment,
30 TT_CastRParen,
31 TT_ConditionalExpr,
32 TT_CtorInitializerColon,
Daniel Jaspere8b10d32013-07-26 16:56:36 +000033 TT_CtorInitializerComma,
Alexander Kornienko3b711552013-06-03 16:45:03 +000034 TT_DesignatedInitializerPeriod,
35 TT_ImplicitStringLiteral,
36 TT_InlineASMColon,
37 TT_InheritanceColon,
38 TT_FunctionTypeLParen,
39 TT_LineComment,
40 TT_ObjCArrayLiteral,
41 TT_ObjCBlockLParen,
42 TT_ObjCDecl,
43 TT_ObjCDictLiteral,
44 TT_ObjCForIn,
45 TT_ObjCMethodExpr,
46 TT_ObjCMethodSpecifier,
47 TT_ObjCProperty,
48 TT_ObjCSelectorName,
49 TT_OverloadedOperator,
50 TT_OverloadedOperatorLParen,
51 TT_PointerOrReference,
52 TT_PureVirtualSpecifier,
53 TT_RangeBasedForLoopColon,
54 TT_StartOfName,
55 TT_TemplateCloser,
56 TT_TemplateOpener,
Daniel Jasper2ca37412013-07-09 14:36:48 +000057 TT_TrailingReturnArrow,
Alexander Kornienko3b711552013-06-03 16:45:03 +000058 TT_TrailingUnaryOperator,
59 TT_UnaryOperator,
60 TT_Unknown
61};
62
Daniel Jasper0de1c4d2013-07-09 09:06:29 +000063// Represents what type of block a set of braces open.
64enum BraceBlockKind {
65 BK_Unknown,
66 BK_Block,
67 BK_BracedInit
68};
69
Daniel Jasperc7bd68f2013-07-10 14:02:49 +000070// The packing kind of a function's parameters.
71enum ParameterPackingKind {
72 PPK_BinPacked,
73 PPK_OnePerLine,
74 PPK_Inconclusive
75};
76
Daniel Jasperd4a03db2013-08-22 15:00:41 +000077class TokenRole;
78
Alexander Kornienko3b711552013-06-03 16:45:03 +000079/// \brief A wrapper around a \c Token storing information about the
80/// whitespace characters preceeding it.
81struct FormatToken {
82 FormatToken()
83 : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
Alexander Kornienko54e6c9d2013-06-07 17:45:07 +000084 CodePointCount(0), IsFirst(false), MustBreakBefore(false),
Daniel Jasper561211d2013-07-16 20:28:33 +000085 IsUnterminatedLiteral(false), BlockKind(BK_Unknown), Type(TT_Unknown),
86 SpacesRequiredBefore(0), CanBreakBefore(false),
87 ClosesTemplateDeclaration(false), ParameterCount(0),
88 PackingKind(PPK_Inconclusive), TotalLength(0), UnbreakableTailLength(0),
89 BindingStrength(0), SplitPenalty(0), LongestObjCSelectorName(0),
90 FakeRParens(0), LastInChainOfCalls(false),
Alexander Kornienko3b711552013-06-03 16:45:03 +000091 PartOfMultiVariableDeclStmt(false), MatchingParen(NULL), Previous(NULL),
92 Next(NULL) {}
93
94 /// \brief The \c Token.
95 Token Tok;
96
97 /// \brief The number of newlines immediately before the \c Token.
98 ///
99 /// This can be used to determine what the user wrote in the original code
100 /// and thereby e.g. leave an empty line between two function definitions.
101 unsigned NewlinesBefore;
102
103 /// \brief Whether there is at least one unescaped newline before the \c
104 /// Token.
105 bool HasUnescapedNewline;
106
107 /// \brief The range of the whitespace immediately preceeding the \c Token.
108 SourceRange WhitespaceRange;
109
110 /// \brief The offset just past the last '\n' in this token's leading
111 /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
112 unsigned LastNewlineOffset;
113
Alexander Kornienko00895102013-06-05 14:09:10 +0000114 /// \brief The length of the non-whitespace parts of the token in CodePoints.
115 /// We need this to correctly measure number of columns a token spans.
116 unsigned CodePointCount;
Alexander Kornienko3b711552013-06-03 16:45:03 +0000117
118 /// \brief Indicates that this is the first token.
119 bool IsFirst;
120
121 /// \brief Whether there must be a line break before this token.
122 ///
123 /// This happens for example when a preprocessor directive ended directly
124 /// before the token.
125 bool MustBreakBefore;
126
127 /// \brief Returns actual token start location without leading escaped
128 /// newlines and whitespace.
129 ///
130 /// This can be different to Tok.getLocation(), which includes leading escaped
131 /// newlines.
132 SourceLocation getStartOfNonWhitespace() const {
133 return WhitespaceRange.getEnd();
134 }
135
136 /// \brief The raw text of the token.
137 ///
138 /// Contains the raw token text without leading whitespace and without leading
139 /// escaped newlines.
140 StringRef TokenText;
141
Daniel Jasper561211d2013-07-16 20:28:33 +0000142 /// \brief Set to \c true if this token is an unterminated literal.
143 bool IsUnterminatedLiteral;
144
Daniel Jasper0de1c4d2013-07-09 09:06:29 +0000145 /// \brief Contains the kind of block if this token is a brace.
146 BraceBlockKind BlockKind;
147
Alexander Kornienko3b711552013-06-03 16:45:03 +0000148 TokenType Type;
149
Daniel Jasperd4a03db2013-08-22 15:00:41 +0000150 /// \brief The number of spaces that should be inserted before this token.
Alexander Kornienko3b711552013-06-03 16:45:03 +0000151 unsigned SpacesRequiredBefore;
Daniel Jasperd4a03db2013-08-22 15:00:41 +0000152
153 /// \brief \c true if it is allowed to break before this token.
Alexander Kornienko3b711552013-06-03 16:45:03 +0000154 bool CanBreakBefore;
155
156 bool ClosesTemplateDeclaration;
157
158 /// \brief Number of parameters, if this is "(", "[" or "<".
159 ///
160 /// This is initialized to 1 as we don't need to distinguish functions with
161 /// 0 parameters from functions with 1 parameter. Thus, we can simply count
162 /// the number of commas.
163 unsigned ParameterCount;
164
Daniel Jasperd4a03db2013-08-22 15:00:41 +0000165 /// \brief A token can have a special role that can carry extra information
166 /// about the token's formatting.
167 llvm::OwningPtr<TokenRole> Role;
168
Daniel Jasperc7bd68f2013-07-10 14:02:49 +0000169 /// \brief If this is an opening parenthesis, how are the parameters packed?
170 ParameterPackingKind PackingKind;
171
Alexander Kornienko3b711552013-06-03 16:45:03 +0000172 /// \brief The total length of the line up to and including this token.
173 unsigned TotalLength;
174
175 /// \brief The length of following tokens until the next natural split point,
176 /// or the next token that can be broken.
177 unsigned UnbreakableTailLength;
178
179 // FIXME: Come up with a 'cleaner' concept.
180 /// \brief The binding strength of a token. This is a combined value of
181 /// operator precedence, parenthesis nesting, etc.
182 unsigned BindingStrength;
183
184 /// \brief Penalty for inserting a line break before this token.
185 unsigned SplitPenalty;
186
187 /// \brief If this is the first ObjC selector name in an ObjC method
188 /// definition or call, this contains the length of the longest name.
189 unsigned LongestObjCSelectorName;
190
191 /// \brief Stores the number of required fake parentheses and the
192 /// corresponding operator precedence.
193 ///
194 /// If multiple fake parentheses start at a token, this vector stores them in
195 /// reverse order, i.e. inner fake parenthesis first.
196 SmallVector<prec::Level, 4> FakeLParens;
197 /// \brief Insert this many fake ) after this token for correct indentation.
198 unsigned FakeRParens;
199
200 /// \brief Is this the last "." or "->" in a builder-type call?
201 bool LastInChainOfCalls;
202
203 /// \brief Is this token part of a \c DeclStmt defining multiple variables?
204 ///
205 /// Only set if \c Type == \c TT_StartOfName.
206 bool PartOfMultiVariableDeclStmt;
207
208 bool is(tok::TokenKind Kind) const { return Tok.is(Kind); }
209
210 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
211 return is(K1) || is(K2);
212 }
213
214 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3) const {
215 return is(K1) || is(K2) || is(K3);
216 }
217
218 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3,
219 tok::TokenKind K4, tok::TokenKind K5 = tok::NUM_TOKENS,
220 tok::TokenKind K6 = tok::NUM_TOKENS,
221 tok::TokenKind K7 = tok::NUM_TOKENS,
222 tok::TokenKind K8 = tok::NUM_TOKENS,
223 tok::TokenKind K9 = tok::NUM_TOKENS,
224 tok::TokenKind K10 = tok::NUM_TOKENS,
225 tok::TokenKind K11 = tok::NUM_TOKENS,
226 tok::TokenKind K12 = tok::NUM_TOKENS) const {
227 return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) ||
228 is(K8) || is(K9) || is(K10) || is(K11) || is(K12);
229 }
230
231 bool isNot(tok::TokenKind Kind) const { return Tok.isNot(Kind); }
232
233 bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
234 return Tok.isObjCAtKeyword(Kind);
235 }
236
237 bool isAccessSpecifier(bool ColonRequired = true) const {
238 return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) &&
239 (!ColonRequired || (Next && Next->is(tok::colon)));
240 }
241
242 bool isObjCAccessSpecifier() const {
243 return is(tok::at) && Next && (Next->isObjCAtKeyword(tok::objc_public) ||
244 Next->isObjCAtKeyword(tok::objc_protected) ||
245 Next->isObjCAtKeyword(tok::objc_package) ||
246 Next->isObjCAtKeyword(tok::objc_private));
247 }
248
249 /// \brief Returns whether \p Tok is ([{ or a template opening <.
250 bool opensScope() const {
251 return isOneOf(tok::l_paren, tok::l_brace, tok::l_square) ||
252 Type == TT_TemplateOpener;
Alexander Kornienko3b711552013-06-03 16:45:03 +0000253 }
Nico Weber0e5a8882013-06-25 19:25:12 +0000254 /// \brief Returns whether \p Tok is )]} or a template closing >.
Alexander Kornienko3b711552013-06-03 16:45:03 +0000255 bool closesScope() const {
256 return isOneOf(tok::r_paren, tok::r_brace, tok::r_square) ||
257 Type == TT_TemplateCloser;
258 }
259
260 bool isUnaryOperator() const {
261 switch (Tok.getKind()) {
262 case tok::plus:
263 case tok::plusplus:
264 case tok::minus:
265 case tok::minusminus:
266 case tok::exclaim:
267 case tok::tilde:
268 case tok::kw_sizeof:
269 case tok::kw_alignof:
270 return true;
271 default:
272 return false;
273 }
274 }
275 bool isBinaryOperator() const {
276 // Comma is a binary operator, but does not behave as such wrt. formatting.
277 return getPrecedence() > prec::Comma;
278 }
279 bool isTrailingComment() const {
280 return is(tok::comment) && (!Next || Next->NewlinesBefore > 0);
281 }
282
283 prec::Level getPrecedence() const {
284 return getBinOpPrecedence(Tok.getKind(), true, true);
285 }
286
287 /// \brief Returns the previous token ignoring comments.
Alexander Kornienko0bdc6432013-07-04 14:47:51 +0000288 FormatToken *getPreviousNonComment() const {
Alexander Kornienko3b711552013-06-03 16:45:03 +0000289 FormatToken *Tok = Previous;
290 while (Tok != NULL && Tok->is(tok::comment))
291 Tok = Tok->Previous;
292 return Tok;
293 }
294
295 /// \brief Returns the next token ignoring comments.
Alexander Kornienko0bdc6432013-07-04 14:47:51 +0000296 const FormatToken *getNextNonComment() const {
Alexander Kornienko3b711552013-06-03 16:45:03 +0000297 const FormatToken *Tok = Next;
298 while (Tok != NULL && Tok->is(tok::comment))
299 Tok = Tok->Next;
300 return Tok;
301 }
302
303 FormatToken *MatchingParen;
304
305 FormatToken *Previous;
306 FormatToken *Next;
307
308private:
309 // Disallow copying.
Craig Topper53d4f312013-07-01 04:07:34 +0000310 FormatToken(const FormatToken &) LLVM_DELETED_FUNCTION;
311 void operator=(const FormatToken &) LLVM_DELETED_FUNCTION;
Alexander Kornienko3b711552013-06-03 16:45:03 +0000312};
313
Daniel Jasperd4a03db2013-08-22 15:00:41 +0000314class ContinuationIndenter;
315struct LineState;
316
317class TokenRole {
318public:
319 TokenRole(const FormatStyle &Style) : Style(Style) {}
320 virtual ~TokenRole();
321
322 /// \brief After the \c TokenAnnotator has finished annotating all the tokens,
323 /// this function precomputes required information for formatting.
324 virtual void precomputeFormattingInfos(const FormatToken *Token);
325
326 /// \brief Apply the special formatting that the given role demands.
327 ///
328 /// Continues formatting from \p State leaving indentation to \p Indenter and
329 /// returns the total penalty that this formatting incurs.
330 virtual unsigned format(LineState &State, ContinuationIndenter *Indenter,
331 bool DryRun) {
332 return 0;
333 }
334
335 /// \brief Notifies the \c Role that a comma was found.
336 virtual void CommaFound(const FormatToken *Token) {}
337
338protected:
339 const FormatStyle &Style;
340};
341
342class CommaSeparatedList : public TokenRole {
343public:
344 CommaSeparatedList(const FormatStyle &Style) : TokenRole(Style) {}
345
346 virtual void precomputeFormattingInfos(const FormatToken *Token);
347
348 virtual unsigned format(LineState &State, ContinuationIndenter *Indenter,
349 bool DryRun);
350
351 /// \brief Adds \p Token as the next comma to the \c CommaSeparated list.
352 virtual void CommaFound(const FormatToken *Token) { Commas.push_back(Token); }
353
354private:
355 /// \brief A struct that holds information on how to format a given list with
356 /// a specific number of columns.
357 struct ColumnFormat {
358 /// \brief The number of columns to use.
359 unsigned Columns;
360
361 /// \brief The total width in characters.
362 unsigned TotalWidth;
363
364 /// \brief The number of lines required for this format.
365 unsigned LineCount;
366
367 /// \brief The size of each column in characters.
368 SmallVector<unsigned, 8> ColumnSizes;
369 };
370
371 /// \brief Calculate which \c ColumnFormat fits best into
372 /// \p RemainingCharacters.
373 const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const;
374
375 /// \brief The ordered \c FormatTokens making up the commas of this list.
376 SmallVector<const FormatToken *, 8> Commas;
377
378 /// \brief The length of each of the list's items in characters including the
379 /// trailing comma.
380 SmallVector<unsigned, 8> ItemLengths;
381
382 /// \brief Precomputed formats that can be used for this list.
383 SmallVector<ColumnFormat, 4> Formats;
384};
385
Alexander Kornienko3b711552013-06-03 16:45:03 +0000386} // namespace format
387} // namespace clang
388
389#endif // LLVM_CLANG_FORMAT_FORMAT_TOKEN_H