blob: 12e7eac80edf9d8aed512083ce228d0e0d738333 [file] [log] [blame]
Daniel Jasper32d28ee2013-01-29 21:01:14 +00001//===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements a token annotator, i.e. creates
12/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13///
14//===----------------------------------------------------------------------===//
15
16#include "TokenAnnotator.h"
17#include "clang/Basic/SourceManager.h"
18#include "clang/Lex/Lexer.h"
19
20namespace clang {
21namespace format {
22
Nico Weberee0feec2013-02-05 16:21:00 +000023static bool isUnaryOperator(const AnnotatedToken &Tok) {
24 switch (Tok.FormatTok.Tok.getKind()) {
25 case tok::plus:
26 case tok::plusplus:
27 case tok::minus:
28 case tok::minusminus:
29 case tok::exclaim:
30 case tok::tilde:
31 case tok::kw_sizeof:
32 case tok::kw_alignof:
33 return true;
34 default:
35 return false;
36 }
37}
38
Daniel Jasper32d28ee2013-01-29 21:01:14 +000039static bool isBinaryOperator(const AnnotatedToken &Tok) {
40 // Comma is a binary operator, but does not behave as such wrt. formatting.
41 return getPrecedence(Tok) > prec::Comma;
42}
43
Daniel Jasper01786732013-02-04 07:21:18 +000044// Returns the previous token ignoring comments.
45static const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) {
46 const AnnotatedToken *PrevToken = Tok.Parent;
47 while (PrevToken != NULL && PrevToken->is(tok::comment))
48 PrevToken = PrevToken->Parent;
49 return PrevToken;
50}
51
52// Returns the next token ignoring comments.
53static const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) {
54 if (Tok.Children.empty())
55 return NULL;
56 const AnnotatedToken *NextToken = &Tok.Children[0];
57 while (NextToken->is(tok::comment)) {
58 if (NextToken->Children.empty())
59 return NULL;
60 NextToken = &NextToken->Children[0];
61 }
62 return NextToken;
63}
64
Daniel Jasper32d28ee2013-01-29 21:01:14 +000065/// \brief A parser that gathers additional information about tokens.
66///
67/// The \c TokenAnnotator tries to matches parenthesis and square brakets and
68/// store a parenthesis levels. It also tries to resolve matching "<" and ">"
69/// into template parameter lists.
70class AnnotatingParser {
71public:
Daniel Jasper01786732013-02-04 07:21:18 +000072 AnnotatingParser(SourceManager &SourceMgr, Lexer &Lex, AnnotatedLine &Line)
73 : SourceMgr(SourceMgr), Lex(Lex), Line(Line), CurrentToken(&Line.First),
Daniel Jasper4e778092013-02-06 10:05:46 +000074 KeywordVirtualFound(false) {
75 Contexts.push_back(Context(1, /*IsExpression=*/ false));
76 Contexts.back().LookForFunctionName = Line.MustBeDeclaration;
Daniel Jasper32d28ee2013-01-29 21:01:14 +000077 }
78
Daniel Jasper32d28ee2013-01-29 21:01:14 +000079 bool parseAngle() {
80 if (CurrentToken == NULL)
81 return false;
Daniel Jasper4e778092013-02-06 10:05:46 +000082 ScopedContextCreator ContextCreator(*this, 10);
Daniel Jasper32d28ee2013-01-29 21:01:14 +000083 AnnotatedToken *Left = CurrentToken->Parent;
Daniel Jasper4e778092013-02-06 10:05:46 +000084 Contexts.back().IsExpression = false;
Daniel Jasper32d28ee2013-01-29 21:01:14 +000085 while (CurrentToken != NULL) {
86 if (CurrentToken->is(tok::greater)) {
87 Left->MatchingParen = CurrentToken;
88 CurrentToken->MatchingParen = Left;
89 CurrentToken->Type = TT_TemplateCloser;
90 next();
91 return true;
92 }
93 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square) ||
94 CurrentToken->is(tok::r_brace))
95 return false;
96 if (CurrentToken->is(tok::pipepipe) || CurrentToken->is(tok::ampamp) ||
97 CurrentToken->is(tok::question) || CurrentToken->is(tok::colon))
98 return false;
99 if (CurrentToken->is(tok::comma))
100 ++Left->ParameterCount;
101 if (!consumeToken())
102 return false;
103 }
104 return false;
105 }
106
107 bool parseParens(bool LookForDecls = false) {
108 if (CurrentToken == NULL)
109 return false;
Daniel Jasper4e778092013-02-06 10:05:46 +0000110 ScopedContextCreator ContextCreator(*this, 1);
111
112 // FIXME: This is a bit of a hack. Do better.
113 Contexts.back().ColonIsForRangeExpr =
114 Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
115
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000116 bool StartsObjCMethodExpr = false;
117 AnnotatedToken *Left = CurrentToken->Parent;
118 if (CurrentToken->is(tok::caret)) {
119 // ^( starts a block.
120 Left->Type = TT_ObjCBlockLParen;
121 } else if (AnnotatedToken *MaybeSel = Left->Parent) {
122 // @selector( starts a selector.
123 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent &&
124 MaybeSel->Parent->is(tok::at)) {
125 StartsObjCMethodExpr = true;
126 }
127 }
128
Daniel Jasper4e778092013-02-06 10:05:46 +0000129 if (StartsObjCMethodExpr) {
130 Contexts.back().ColonIsObjCMethodExpr = true;
131 Left->Type = TT_ObjCMethodExpr;
132 }
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000133
134 while (CurrentToken != NULL) {
135 // LookForDecls is set when "if (" has been seen. Check for
136 // 'identifier' '*' 'identifier' followed by not '=' -- this
137 // '*' has to be a binary operator but determineStarAmpUsage() will
138 // categorize it as an unary operator, so set the right type here.
139 if (LookForDecls && !CurrentToken->Children.empty()) {
140 AnnotatedToken &Prev = *CurrentToken->Parent;
141 AnnotatedToken &Next = CurrentToken->Children[0];
142 if (Prev.Parent->is(tok::identifier) &&
143 (Prev.is(tok::star) || Prev.is(tok::amp)) &&
144 CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) {
145 Prev.Type = TT_BinaryOperator;
146 LookForDecls = false;
147 }
148 }
149
150 if (CurrentToken->is(tok::r_paren)) {
151 Left->MatchingParen = CurrentToken;
152 CurrentToken->MatchingParen = Left;
153
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000154 if (StartsObjCMethodExpr) {
Daniel Jasper4e778092013-02-06 10:05:46 +0000155 CurrentToken->Type = TT_ObjCMethodExpr;
156 if (Contexts.back().FirstObjCSelectorName != NULL) {
157 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
158 Contexts.back().LongestObjCSelectorName;
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000159 }
160 }
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000161
162 next();
163 return true;
164 }
165 if (CurrentToken->is(tok::r_square) || CurrentToken->is(tok::r_brace))
166 return false;
167 if (CurrentToken->is(tok::comma))
168 ++Left->ParameterCount;
169 if (!consumeToken())
170 return false;
171 }
172 return false;
173 }
174
175 bool parseSquare() {
176 if (!CurrentToken)
177 return false;
Daniel Jasper4e778092013-02-06 10:05:46 +0000178 ScopedContextCreator ContextCreator(*this, 10);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000179
180 // A '[' could be an index subscript (after an indentifier or after
181 // ')' or ']'), or it could be the start of an Objective-C method
182 // expression.
183 AnnotatedToken *Left = CurrentToken->Parent;
184 bool StartsObjCMethodExpr =
185 !Left->Parent || Left->Parent->is(tok::colon) ||
186 Left->Parent->is(tok::l_square) || Left->Parent->is(tok::l_paren) ||
187 Left->Parent->is(tok::kw_return) || Left->Parent->is(tok::kw_throw) ||
Nico Weberee0feec2013-02-05 16:21:00 +0000188 isUnaryOperator(*Left->Parent) ||
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000189 getBinOpPrecedence(Left->Parent->FormatTok.Tok.getKind(), true, true) >
190 prec::Unknown;
191
Daniel Jasper4e778092013-02-06 10:05:46 +0000192 if (StartsObjCMethodExpr) {
193 Contexts.back().ColonIsObjCMethodExpr = true;
194 Left->Type = TT_ObjCMethodExpr;
195 }
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000196
197 while (CurrentToken != NULL) {
198 if (CurrentToken->is(tok::r_square)) {
199 if (!CurrentToken->Children.empty() &&
200 CurrentToken->Children[0].is(tok::l_paren)) {
Nico Webere8a97982013-02-06 06:20:11 +0000201 // An ObjC method call is rarely followed by an open parenthesis.
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000202 // FIXME: Do we incorrectly label ":" with this?
203 StartsObjCMethodExpr = false;
204 Left->Type = TT_Unknown;
205 }
Daniel Jasper01786732013-02-04 07:21:18 +0000206 if (StartsObjCMethodExpr) {
Daniel Jasper4e778092013-02-06 10:05:46 +0000207 CurrentToken->Type = TT_ObjCMethodExpr;
Nico Webere8a97982013-02-06 06:20:11 +0000208 // determineStarAmpUsage() thinks that '*' '[' is allocating an
209 // array of pointers, but if '[' starts a selector then '*' is a
210 // binary operator.
Daniel Jasper01786732013-02-04 07:21:18 +0000211 if (Left->Parent != NULL &&
Nico Webere8a97982013-02-06 06:20:11 +0000212 (Left->Parent->is(tok::star) || Left->Parent->is(tok::amp)) &&
213 Left->Parent->Type == TT_PointerOrReference)
Daniel Jasper01786732013-02-04 07:21:18 +0000214 Left->Parent->Type = TT_BinaryOperator;
215 }
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000216 Left->MatchingParen = CurrentToken;
217 CurrentToken->MatchingParen = Left;
Daniel Jasper4e778092013-02-06 10:05:46 +0000218 if (Contexts.back().FirstObjCSelectorName != NULL)
219 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
220 Contexts.back().LongestObjCSelectorName;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000221 next();
222 return true;
223 }
224 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_brace))
225 return false;
226 if (CurrentToken->is(tok::comma))
227 ++Left->ParameterCount;
228 if (!consumeToken())
229 return false;
230 }
231 return false;
232 }
233
234 bool parseBrace() {
235 // Lines are fine to end with '{'.
236 if (CurrentToken == NULL)
237 return true;
Daniel Jasper4e778092013-02-06 10:05:46 +0000238 ScopedContextCreator ContextCreator(*this, 1);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000239 AnnotatedToken *Left = CurrentToken->Parent;
240 while (CurrentToken != NULL) {
241 if (CurrentToken->is(tok::r_brace)) {
242 Left->MatchingParen = CurrentToken;
243 CurrentToken->MatchingParen = Left;
244 next();
245 return true;
246 }
247 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square))
248 return false;
Daniel Jasperf343cab2013-01-31 14:59:26 +0000249 if (CurrentToken->is(tok::comma))
250 ++Left->ParameterCount;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000251 if (!consumeToken())
252 return false;
253 }
254 return true;
255 }
256
257 bool parseConditional() {
258 while (CurrentToken != NULL) {
259 if (CurrentToken->is(tok::colon)) {
260 CurrentToken->Type = TT_ConditionalExpr;
261 next();
262 return true;
263 }
264 if (!consumeToken())
265 return false;
266 }
267 return false;
268 }
269
270 bool parseTemplateDeclaration() {
271 if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
272 CurrentToken->Type = TT_TemplateOpener;
273 next();
274 if (!parseAngle())
275 return false;
276 CurrentToken->Parent->ClosesTemplateDeclaration = true;
277 return true;
278 }
279 return false;
280 }
281
282 bool consumeToken() {
283 AnnotatedToken *Tok = CurrentToken;
284 next();
285 switch (Tok->FormatTok.Tok.getKind()) {
286 case tok::plus:
287 case tok::minus:
288 // At the start of the line, +/- specific ObjectiveC method
289 // declarations.
290 if (Tok->Parent == NULL)
291 Tok->Type = TT_ObjCMethodSpecifier;
292 break;
293 case tok::colon:
294 // Colons from ?: are handled in parseConditional().
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000295 if (Tok->Parent->is(tok::r_paren)) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000296 Tok->Type = TT_CtorInitializerColon;
Daniel Jasper4e778092013-02-06 10:05:46 +0000297 } else if (Contexts.back().ColonIsObjCMethodExpr ||
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000298 Line.First.Type == TT_ObjCMethodSpecifier) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000299 Tok->Type = TT_ObjCMethodExpr;
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000300 Tok->Parent->Type = TT_ObjCSelectorName;
Daniel Jasper4e778092013-02-06 10:05:46 +0000301 if (Tok->Parent->FormatTok.TokenLength >
302 Contexts.back().LongestObjCSelectorName)
303 Contexts.back().LongestObjCSelectorName =
304 Tok->Parent->FormatTok.TokenLength;
305 if (Contexts.back().FirstObjCSelectorName == NULL)
306 Contexts.back().FirstObjCSelectorName = Tok->Parent;
307 } else if (Contexts.back().ColonIsForRangeExpr) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000308 Tok->Type = TT_RangeBasedForLoopColon;
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000309 }
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000310 break;
311 case tok::kw_if:
312 case tok::kw_while:
313 if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
314 next();
315 if (!parseParens(/*LookForDecls=*/ true))
316 return false;
317 }
318 break;
319 case tok::kw_for:
Daniel Jasper4e778092013-02-06 10:05:46 +0000320 Contexts.back().ColonIsForRangeExpr = true;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000321 next();
322 if (!parseParens())
323 return false;
324 break;
325 case tok::l_paren:
326 if (!parseParens())
327 return false;
328 break;
329 case tok::l_square:
330 if (!parseSquare())
331 return false;
332 break;
333 case tok::l_brace:
334 if (!parseBrace())
335 return false;
336 break;
337 case tok::less:
338 if (parseAngle())
339 Tok->Type = TT_TemplateOpener;
340 else {
341 Tok->Type = TT_BinaryOperator;
342 CurrentToken = Tok;
343 next();
344 }
345 break;
346 case tok::r_paren:
347 case tok::r_square:
348 return false;
349 case tok::r_brace:
350 // Lines can start with '}'.
351 if (Tok->Parent != NULL)
352 return false;
353 break;
354 case tok::greater:
355 Tok->Type = TT_BinaryOperator;
356 break;
357 case tok::kw_operator:
358 if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
359 CurrentToken->Type = TT_OverloadedOperator;
360 next();
361 if (CurrentToken != NULL && CurrentToken->is(tok::r_paren)) {
362 CurrentToken->Type = TT_OverloadedOperator;
363 next();
364 }
365 } else {
366 while (CurrentToken != NULL && CurrentToken->isNot(tok::l_paren)) {
367 CurrentToken->Type = TT_OverloadedOperator;
368 next();
369 }
370 }
371 break;
372 case tok::question:
373 parseConditional();
374 break;
375 case tok::kw_template:
376 parseTemplateDeclaration();
377 break;
378 default:
379 break;
380 }
381 return true;
382 }
383
384 void parseIncludeDirective() {
385 next();
386 if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
387 next();
388 while (CurrentToken != NULL) {
389 if (CurrentToken->isNot(tok::comment) ||
390 !CurrentToken->Children.empty())
391 CurrentToken->Type = TT_ImplicitStringLiteral;
392 next();
393 }
394 } else {
395 while (CurrentToken != NULL) {
396 next();
397 }
398 }
399 }
400
401 void parseWarningOrError() {
402 next();
403 // We still want to format the whitespace left of the first token of the
404 // warning or error.
405 next();
406 while (CurrentToken != NULL) {
407 CurrentToken->Type = TT_ImplicitStringLiteral;
408 next();
409 }
410 }
411
412 void parsePreprocessorDirective() {
413 next();
414 if (CurrentToken == NULL)
415 return;
416 // Hashes in the middle of a line can lead to any strange token
417 // sequence.
418 if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL)
419 return;
420 switch (CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
421 case tok::pp_include:
422 case tok::pp_import:
423 parseIncludeDirective();
424 break;
425 case tok::pp_error:
426 case tok::pp_warning:
427 parseWarningOrError();
428 break;
429 default:
430 break;
431 }
Daniel Jasper5b7e7b02013-02-05 09:34:14 +0000432 while (CurrentToken != NULL)
433 next();
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000434 }
435
436 LineType parseLine() {
437 int PeriodsAndArrows = 0;
438 bool CanBeBuilderTypeStmt = true;
439 if (CurrentToken->is(tok::hash)) {
440 parsePreprocessorDirective();
441 return LT_PreprocessorDirective;
442 }
443 while (CurrentToken != NULL) {
444 if (CurrentToken->is(tok::kw_virtual))
445 KeywordVirtualFound = true;
446 if (CurrentToken->is(tok::period) || CurrentToken->is(tok::arrow))
447 ++PeriodsAndArrows;
448 if (getPrecedence(*CurrentToken) > prec::Assignment &&
449 CurrentToken->isNot(tok::less) && CurrentToken->isNot(tok::greater))
450 CanBeBuilderTypeStmt = false;
451 if (!consumeToken())
452 return LT_Invalid;
453 }
454 if (KeywordVirtualFound)
455 return LT_VirtualFunctionDecl;
456
457 // Assume a builder-type call if there are 2 or more "." and "->".
458 if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt)
459 return LT_BuilderTypeCall;
460
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000461 if (Line.First.Type == TT_ObjCMethodSpecifier) {
Daniel Jasper4e778092013-02-06 10:05:46 +0000462 if (Contexts.back().FirstObjCSelectorName != NULL)
463 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
464 Contexts.back().LongestObjCSelectorName;
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000465 return LT_ObjCMethodDecl;
466 }
467
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000468 return LT_Other;
469 }
470
471 void next() {
Daniel Jasper01786732013-02-04 07:21:18 +0000472 if (CurrentToken != NULL) {
473 determineTokenType(*CurrentToken);
Daniel Jasper4e778092013-02-06 10:05:46 +0000474 CurrentToken->BindingStrength = Contexts.back().BindingStrength;
Daniel Jasper01786732013-02-04 07:21:18 +0000475 }
476
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000477 if (CurrentToken != NULL && !CurrentToken->Children.empty())
478 CurrentToken = &CurrentToken->Children[0];
479 else
480 CurrentToken = NULL;
481 }
482
483private:
Daniel Jasper4e778092013-02-06 10:05:46 +0000484 /// \brief A struct to hold information valid in a specific context, e.g.
485 /// a pair of parenthesis.
486 struct Context {
487 Context(unsigned BindingStrength, bool IsExpression)
488 : BindingStrength(BindingStrength), LongestObjCSelectorName(0),
489 ColonIsForRangeExpr(false), ColonIsObjCMethodExpr(false),
490 FirstObjCSelectorName(NULL), IsExpression(IsExpression),
491 LookForFunctionName(false) {
492 }
Daniel Jasper01786732013-02-04 07:21:18 +0000493
Daniel Jasper4e778092013-02-06 10:05:46 +0000494 unsigned BindingStrength;
495 unsigned LongestObjCSelectorName;
496 bool ColonIsForRangeExpr;
497 bool ColonIsObjCMethodExpr;
498 AnnotatedToken *FirstObjCSelectorName;
499 bool IsExpression;
500 bool LookForFunctionName;
501 };
502
503 /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime
504 /// of each instance.
505 struct ScopedContextCreator {
506 AnnotatingParser &P;
507
508 ScopedContextCreator(AnnotatingParser &P, unsigned Increase)
509 : P(P) {
510 P.Contexts.push_back(Context(
511 P.Contexts.back().BindingStrength + Increase,
512 P.Contexts.back().IsExpression));
513 }
514
515 ~ScopedContextCreator() { P.Contexts.pop_back(); }
516 };
Daniel Jasper01786732013-02-04 07:21:18 +0000517
518 void determineTokenType(AnnotatedToken &Current) {
519 if (getPrecedence(Current) == prec::Assignment) {
Daniel Jasper4e778092013-02-06 10:05:46 +0000520 Contexts.back().IsExpression = true;
Daniel Jasper01786732013-02-04 07:21:18 +0000521 AnnotatedToken *Previous = Current.Parent;
Daniel Jasper6b5ba8b2013-02-06 10:57:42 +0000522 while (Previous != NULL && Previous->isNot(tok::comma)) {
Daniel Jasper01786732013-02-04 07:21:18 +0000523 if (Previous->Type == TT_BinaryOperator &&
524 (Previous->is(tok::star) || Previous->is(tok::amp))) {
525 Previous->Type = TT_PointerOrReference;
526 }
527 Previous = Previous->Parent;
528 }
529 }
530 if (Current.is(tok::kw_return) || Current.is(tok::kw_throw) ||
531 (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
532 (Current.Parent == NULL || Current.Parent->isNot(tok::kw_for))))
Daniel Jasper4e778092013-02-06 10:05:46 +0000533 Contexts.back().IsExpression = true;
Daniel Jasper01786732013-02-04 07:21:18 +0000534
535 if (Current.Type == TT_Unknown) {
Daniel Jasper4e778092013-02-06 10:05:46 +0000536 if (Contexts.back().LookForFunctionName && Current.is(tok::l_paren)) {
Daniel Jasper01786732013-02-04 07:21:18 +0000537 findFunctionName(&Current);
Daniel Jasper4e778092013-02-06 10:05:46 +0000538 Contexts.back().LookForFunctionName = false;
Daniel Jasper01786732013-02-04 07:21:18 +0000539 } else if (Current.is(tok::star) || Current.is(tok::amp)) {
Daniel Jasper4e778092013-02-06 10:05:46 +0000540 Current.Type =
541 determineStarAmpUsage(Current, Contexts.back().IsExpression);
Daniel Jasper01786732013-02-04 07:21:18 +0000542 } else if (Current.is(tok::minus) || Current.is(tok::plus) ||
543 Current.is(tok::caret)) {
544 Current.Type = determinePlusMinusCaretUsage(Current);
545 } else if (Current.is(tok::minusminus) || Current.is(tok::plusplus)) {
546 Current.Type = determineIncrementUsage(Current);
547 } else if (Current.is(tok::exclaim)) {
548 Current.Type = TT_UnaryOperator;
549 } else if (isBinaryOperator(Current)) {
550 Current.Type = TT_BinaryOperator;
551 } else if (Current.is(tok::comment)) {
552 std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr,
553 Lex.getLangOpts()));
554 if (StringRef(Data).startswith("//"))
555 Current.Type = TT_LineComment;
556 else
557 Current.Type = TT_BlockComment;
558 } else if (Current.is(tok::r_paren) &&
559 (Current.Parent->Type == TT_PointerOrReference ||
560 Current.Parent->Type == TT_TemplateCloser) &&
561 (Current.Children.empty() ||
562 (Current.Children[0].isNot(tok::equal) &&
563 Current.Children[0].isNot(tok::semi) &&
564 Current.Children[0].isNot(tok::l_brace)))) {
565 // FIXME: We need to get smarter and understand more cases of casts.
566 Current.Type = TT_CastRParen;
567 } else if (Current.is(tok::at) && Current.Children.size()) {
568 switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) {
569 case tok::objc_interface:
570 case tok::objc_implementation:
571 case tok::objc_protocol:
572 Current.Type = TT_ObjCDecl;
573 break;
574 case tok::objc_property:
575 Current.Type = TT_ObjCProperty;
576 break;
577 default:
578 break;
579 }
580 }
581 }
582 }
583
584 /// \brief Starting from \p Current, this searches backwards for an
585 /// identifier which could be the start of a function name and marks it.
586 void findFunctionName(AnnotatedToken *Current) {
587 AnnotatedToken *Parent = Current->Parent;
588 while (Parent != NULL && Parent->Parent != NULL) {
589 if (Parent->is(tok::identifier) &&
590 (Parent->Parent->is(tok::identifier) ||
591 Parent->Parent->Type == TT_PointerOrReference ||
592 Parent->Parent->Type == TT_TemplateCloser)) {
593 Parent->Type = TT_StartOfName;
594 break;
595 }
596 Parent = Parent->Parent;
597 }
598 }
599
600 /// \brief Return the type of the given token assuming it is * or &.
601 TokenType
602 determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression) {
603 const AnnotatedToken *PrevToken = getPreviousToken(Tok);
604 if (PrevToken == NULL)
605 return TT_UnaryOperator;
606
607 const AnnotatedToken *NextToken = getNextToken(Tok);
608 if (NextToken == NULL)
609 return TT_Unknown;
610
Daniel Jasper01786732013-02-04 07:21:18 +0000611 if (PrevToken->is(tok::l_paren) || PrevToken->is(tok::l_square) ||
612 PrevToken->is(tok::l_brace) || PrevToken->is(tok::comma) ||
613 PrevToken->is(tok::kw_return) || PrevToken->is(tok::colon) ||
Nico Webere8a97982013-02-06 06:20:11 +0000614 PrevToken->is(tok::equal) || PrevToken->Type == TT_BinaryOperator ||
Daniel Jasper01786732013-02-04 07:21:18 +0000615 PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
616 return TT_UnaryOperator;
617
Nico Webere8a97982013-02-06 06:20:11 +0000618 if (NextToken->is(tok::l_square))
619 return TT_PointerOrReference;
620
Daniel Jasper01786732013-02-04 07:21:18 +0000621 if (PrevToken->FormatTok.Tok.isLiteral() || PrevToken->is(tok::r_paren) ||
622 PrevToken->is(tok::r_square) || NextToken->FormatTok.Tok.isLiteral() ||
Nico Weberee0feec2013-02-05 16:21:00 +0000623 isUnaryOperator(*NextToken) || NextToken->is(tok::l_paren) ||
624 NextToken->is(tok::l_square))
Daniel Jasper01786732013-02-04 07:21:18 +0000625 return TT_BinaryOperator;
626
627 if (NextToken->is(tok::comma) || NextToken->is(tok::r_paren) ||
628 NextToken->is(tok::greater))
629 return TT_PointerOrReference;
630
631 // It is very unlikely that we are going to find a pointer or reference type
632 // definition on the RHS of an assignment.
633 if (IsExpression)
634 return TT_BinaryOperator;
635
636 return TT_PointerOrReference;
637 }
638
639 TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) {
640 const AnnotatedToken *PrevToken = getPreviousToken(Tok);
641 if (PrevToken == NULL)
642 return TT_UnaryOperator;
643
644 // Use heuristics to recognize unary operators.
645 if (PrevToken->is(tok::equal) || PrevToken->is(tok::l_paren) ||
646 PrevToken->is(tok::comma) || PrevToken->is(tok::l_square) ||
647 PrevToken->is(tok::question) || PrevToken->is(tok::colon) ||
648 PrevToken->is(tok::kw_return) || PrevToken->is(tok::kw_case) ||
649 PrevToken->is(tok::at) || PrevToken->is(tok::l_brace))
650 return TT_UnaryOperator;
651
Nico Weberee0feec2013-02-05 16:21:00 +0000652 // There can't be two consecutive binary operators.
Daniel Jasper01786732013-02-04 07:21:18 +0000653 if (PrevToken->Type == TT_BinaryOperator)
654 return TT_UnaryOperator;
655
656 // Fall back to marking the token as binary operator.
657 return TT_BinaryOperator;
658 }
659
660 /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
661 TokenType determineIncrementUsage(const AnnotatedToken &Tok) {
662 const AnnotatedToken *PrevToken = getPreviousToken(Tok);
663 if (PrevToken == NULL)
664 return TT_UnaryOperator;
665 if (PrevToken->is(tok::r_paren) || PrevToken->is(tok::r_square) ||
666 PrevToken->is(tok::identifier))
667 return TT_TrailingUnaryOperator;
668
669 return TT_UnaryOperator;
670 }
Daniel Jasper4e778092013-02-06 10:05:46 +0000671
672 SmallVector<Context, 8> Contexts;
673
674 SourceManager &SourceMgr;
675 Lexer &Lex;
676 AnnotatedLine &Line;
677 AnnotatedToken *CurrentToken;
678 bool KeywordVirtualFound;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000679};
680
Daniel Jasper8ff690a2013-02-06 14:22:40 +0000681void TokenAnnotator::annotate(AnnotatedLine &Line) {
Daniel Jasper01786732013-02-04 07:21:18 +0000682 AnnotatingParser Parser(SourceMgr, Lex, Line);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000683 Line.Type = Parser.parseLine();
684 if (Line.Type == LT_Invalid)
685 return;
686
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000687 if (Line.First.Type == TT_ObjCMethodSpecifier)
688 Line.Type = LT_ObjCMethodDecl;
689 else if (Line.First.Type == TT_ObjCDecl)
690 Line.Type = LT_ObjCDecl;
691 else if (Line.First.Type == TT_ObjCProperty)
692 Line.Type = LT_ObjCProperty;
693
694 Line.First.SpaceRequiredBefore = true;
695 Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore;
696 Line.First.CanBreakBefore = Line.First.MustBreakBefore;
697
698 Line.First.TotalLength = Line.First.FormatTok.TokenLength;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000699}
700
Daniel Jasper8ff690a2013-02-06 14:22:40 +0000701void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
702 if (Line.First.Children.empty())
703 return;
704 AnnotatedToken *Current = &Line.First.Children[0];
705 while (Current != NULL) {
706 Current->SpaceRequiredBefore = spaceRequiredBefore(Line, *Current);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000707
Daniel Jasper8ff690a2013-02-06 14:22:40 +0000708 if (Current->FormatTok.MustBreakBefore) {
709 Current->MustBreakBefore = true;
710 } else if (Current->Type == TT_LineComment) {
711 Current->MustBreakBefore = Current->FormatTok.NewlinesBefore > 0;
712 } else if ((Current->Parent->is(tok::comment) &&
713 Current->FormatTok.NewlinesBefore > 0) ||
714 (Current->is(tok::string_literal) &&
715 Current->Parent->is(tok::string_literal))) {
716 Current->MustBreakBefore = true;
717 } else if (Current->is(tok::lessless) && !Current->Children.empty() &&
718 Current->Parent->is(tok::string_literal) &&
719 Current->Children[0].is(tok::string_literal)) {
720 Current->MustBreakBefore = true;
721 } else {
722 Current->MustBreakBefore = false;
723 }
724 Current->CanBreakBefore =
725 Current->MustBreakBefore || canBreakBefore(Line, *Current);
726 if (Current->MustBreakBefore)
727 Current->TotalLength = Current->Parent->TotalLength + Style.ColumnLimit;
728 else
729 Current->TotalLength =
730 Current->Parent->TotalLength + Current->FormatTok.TokenLength +
731 (Current->SpaceRequiredBefore ? 1 : 0);
732 // FIXME: Only calculate this if CanBreakBefore is true once static
733 // initializers etc. are sorted out.
734 // FIXME: Move magic numbers to a better place.
735 Current->SplitPenalty =
736 20 * Current->BindingStrength + splitPenalty(Line, *Current);
737
738 Current = Current->Children.empty() ? NULL : &Current->Children[0];
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000739 }
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000740}
741
Daniel Jasper8ff690a2013-02-06 14:22:40 +0000742unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
743 const AnnotatedToken &Tok) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000744 const AnnotatedToken &Left = *Tok.Parent;
745 const AnnotatedToken &Right = Tok;
746
747 if (Left.is(tok::l_brace) && Right.isNot(tok::l_brace))
748 return 50;
749 if (Left.is(tok::equal) && Right.is(tok::l_brace))
750 return 150;
751 if (Left.is(tok::coloncolon))
752 return 500;
753
754 if (Left.Type == TT_RangeBasedForLoopColon)
755 return 5;
756
757 if (Right.is(tok::arrow) || Right.is(tok::period)) {
758 if (Left.is(tok::r_paren) && Line.Type == LT_BuilderTypeCall)
759 return 5; // Should be smaller than breaking at a nested comma.
760 return 150;
761 }
762
763 // In for-loops, prefer breaking at ',' and ';'.
764 if (Line.First.is(tok::kw_for) &&
765 (Left.isNot(tok::comma) && Left.isNot(tok::semi)))
766 return 20;
767
Daniel Jasper8159d2f2013-02-04 07:30:30 +0000768 if (Left.is(tok::semi))
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000769 return 0;
Daniel Jasper8159d2f2013-02-04 07:30:30 +0000770 if (Left.is(tok::comma))
771 return 1;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000772
773 // In Objective-C method expressions, prefer breaking before "param:" over
774 // breaking after it.
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000775 if (Right.Type == TT_ObjCSelectorName)
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000776 return 0;
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000777 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000778 return 20;
779
Daniel Jasper01786732013-02-04 07:21:18 +0000780 if (Left.is(tok::l_paren) || Left.is(tok::l_square) ||
781 Left.Type == TT_TemplateOpener)
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000782 return 20;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000783
Daniel Jasper01786732013-02-04 07:21:18 +0000784 if (Right.is(tok::lessless))
785 return prec::Shift;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000786 if (Left.Type == TT_ConditionalExpr)
787 return prec::Assignment;
788 prec::Level Level = getPrecedence(Left);
789
790 if (Level != prec::Unknown)
791 return Level;
792
793 return 3;
794}
795
Daniel Jasper8ff690a2013-02-06 14:22:40 +0000796bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
797 const AnnotatedToken &Left,
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000798 const AnnotatedToken &Right) {
799 if (Right.is(tok::hashhash))
800 return Left.is(tok::hash);
801 if (Left.is(tok::hashhash) || Left.is(tok::hash))
802 return Right.is(tok::hash);
803 if (Right.is(tok::r_paren) || Right.is(tok::semi) || Right.is(tok::comma))
804 return false;
805 if (Right.is(tok::less) &&
806 (Left.is(tok::kw_template) ||
807 (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
808 return true;
809 if (Left.is(tok::arrow) || Right.is(tok::arrow))
810 return false;
811 if (Left.is(tok::exclaim) || Left.is(tok::tilde))
812 return false;
813 if (Left.is(tok::at) &&
814 (Right.is(tok::identifier) || Right.is(tok::string_literal) ||
815 Right.is(tok::char_constant) || Right.is(tok::numeric_constant) ||
816 Right.is(tok::l_paren) || Right.is(tok::l_brace) ||
817 Right.is(tok::kw_true) || Right.is(tok::kw_false)))
818 return false;
819 if (Left.is(tok::coloncolon))
820 return false;
821 if (Right.is(tok::coloncolon))
822 return Left.isNot(tok::identifier) && Left.isNot(tok::greater);
823 if (Left.is(tok::less) || Right.is(tok::greater) || Right.is(tok::less))
824 return false;
825 if (Right.is(tok::amp) || Right.is(tok::star))
826 return Left.FormatTok.Tok.isLiteral() ||
827 (Left.isNot(tok::star) && Left.isNot(tok::amp) &&
Daniel Jasper8ff690a2013-02-06 14:22:40 +0000828 !Style.PointerBindsToType);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000829 if (Left.is(tok::amp) || Left.is(tok::star))
830 return Right.FormatTok.Tok.isLiteral() ||
Daniel Jasper8ff690a2013-02-06 14:22:40 +0000831 Style.PointerBindsToType;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000832 if (Right.is(tok::star) && Left.is(tok::l_paren))
833 return false;
834 if (Left.is(tok::l_square) || Right.is(tok::r_square))
835 return false;
836 if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr)
837 return false;
838 if (Left.is(tok::period) || Right.is(tok::period))
839 return false;
840 if (Left.is(tok::colon))
841 return Left.Type != TT_ObjCMethodExpr;
842 if (Right.is(tok::colon))
843 return Right.Type != TT_ObjCMethodExpr;
844 if (Left.is(tok::l_paren))
845 return false;
846 if (Right.is(tok::l_paren)) {
847 return Line.Type == LT_ObjCDecl || Left.is(tok::kw_if) ||
848 Left.is(tok::kw_for) || Left.is(tok::kw_while) ||
849 Left.is(tok::kw_switch) || Left.is(tok::kw_return) ||
850 Left.is(tok::kw_catch) || Left.is(tok::kw_new) ||
851 Left.is(tok::kw_delete);
852 }
853 if (Left.is(tok::at) &&
854 Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword)
855 return false;
856 if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
857 return false;
858 return true;
859}
860
Daniel Jasper8ff690a2013-02-06 14:22:40 +0000861bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
862 const AnnotatedToken &Tok) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000863 if (Line.Type == LT_ObjCMethodDecl) {
864 if (Tok.is(tok::identifier) && !Tok.Children.empty() &&
865 Tok.Children[0].is(tok::colon) && Tok.Parent->is(tok::identifier))
866 return true;
867 if (Tok.is(tok::colon))
868 return false;
869 if (Tok.Parent->Type == TT_ObjCMethodSpecifier)
870 return true;
871 if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier))
872 // Don't space between ')' and <id>
873 return false;
874 if (Tok.Parent->is(tok::colon) && Tok.is(tok::l_paren))
875 // Don't space between ':' and '('
876 return false;
877 }
878 if (Line.Type == LT_ObjCProperty &&
879 (Tok.is(tok::equal) || Tok.Parent->is(tok::equal)))
880 return false;
881
882 if (Tok.Parent->is(tok::comma))
883 return true;
884 if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen)
885 return true;
886 if (Tok.Type == TT_OverloadedOperator)
887 return Tok.is(tok::identifier) || Tok.is(tok::kw_new) ||
888 Tok.is(tok::kw_delete) || Tok.is(tok::kw_bool);
889 if (Tok.Parent->Type == TT_OverloadedOperator)
890 return false;
891 if (Tok.is(tok::colon))
892 return Line.First.isNot(tok::kw_case) && !Tok.Children.empty() &&
893 Tok.Type != TT_ObjCMethodExpr;
894 if (Tok.Parent->Type == TT_UnaryOperator || Tok.Parent->Type == TT_CastRParen)
895 return false;
896 if (Tok.Type == TT_UnaryOperator)
897 return Tok.Parent->isNot(tok::l_paren) &&
898 Tok.Parent->isNot(tok::l_square) && Tok.Parent->isNot(tok::at) &&
899 (Tok.Parent->isNot(tok::colon) ||
900 Tok.Parent->Type != TT_ObjCMethodExpr);
901 if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) {
902 return Tok.Type == TT_TemplateCloser && Tok.Parent->Type ==
Daniel Jasper8ff690a2013-02-06 14:22:40 +0000903 TT_TemplateCloser && Style.Standard != FormatStyle::LS_Cpp11;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000904 }
905 if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator)
906 return true;
907 if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
908 return false;
909 if (Tok.is(tok::less) && Line.First.is(tok::hash))
910 return true;
911 if (Tok.Type == TT_TrailingUnaryOperator)
912 return false;
Daniel Jasper8ff690a2013-02-06 14:22:40 +0000913 return spaceRequiredBetween(Line, *Tok.Parent, Tok);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000914}
915
Daniel Jasper8ff690a2013-02-06 14:22:40 +0000916bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
917 const AnnotatedToken &Right) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000918 const AnnotatedToken &Left = *Right.Parent;
919 if (Line.Type == LT_ObjCMethodDecl) {
920 if (Right.is(tok::identifier) && !Right.Children.empty() &&
921 Right.Children[0].is(tok::colon) && Left.is(tok::identifier))
922 return true;
923 if (Right.is(tok::identifier) && Left.is(tok::l_paren) &&
924 Left.Parent->is(tok::colon))
925 // Don't break this identifier as ':' or identifier
926 // before it will break.
927 return false;
928 if (Right.is(tok::colon) && Left.is(tok::identifier) && Left.CanBreakBefore)
929 // Don't break at ':' if identifier before it can beak.
930 return false;
931 }
932 if (Right.Type == TT_StartOfName && Style.AllowReturnTypeOnItsOwnLine)
933 return true;
934 if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
935 return false;
936 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
937 return true;
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000938 if (Right.Type == TT_ObjCSelectorName)
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000939 return true;
940 if (Left.ClosesTemplateDeclaration)
941 return true;
942 if (Right.Type == TT_ConditionalExpr || Right.is(tok::question))
943 return true;
944 if (Left.Type == TT_RangeBasedForLoopColon)
945 return true;
946 if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
947 Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr ||
948 Left.is(tok::question))
949 return false;
950 if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
951 return false;
952
953 if (Right.Type == TT_LineComment)
954 // We rely on MustBreakBefore being set correctly here as we should not
955 // change the "binding" behavior of a comment.
956 return false;
957
958 // Allow breaking after a trailing 'const', e.g. after a method declaration,
959 // unless it is follow by ';', '{' or '='.
960 if (Left.is(tok::kw_const) && Left.Parent != NULL &&
961 Left.Parent->is(tok::r_paren))
962 return Right.isNot(tok::l_brace) && Right.isNot(tok::semi) &&
963 Right.isNot(tok::equal);
964
965 // We only break before r_brace if there was a corresponding break before
966 // the l_brace, which is tracked by BreakBeforeClosingBrace.
967 if (Right.is(tok::r_brace))
968 return false;
969
970 if (Right.is(tok::r_paren) || Right.is(tok::greater))
971 return false;
972 return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) ||
973 Left.is(tok::comma) || Right.is(tok::lessless) ||
974 Right.is(tok::arrow) || Right.is(tok::period) ||
975 Right.is(tok::colon) || Left.is(tok::coloncolon) ||
976 Left.is(tok::semi) || Left.is(tok::l_brace) ||
977 (Left.is(tok::r_paren) && Left.Type != TT_CastRParen &&
978 Right.is(tok::identifier)) ||
979 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
980 (Left.is(tok::l_square) && !Right.is(tok::r_square));
981}
982
983} // namespace format
984} // namespace clang