blob: e1f6de3903496de7244c262e0b5b0420f26f4c08 [file] [log] [blame]
Daniel Jasper32d28ee2013-01-29 21:01:14 +00001//===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements a token annotator, i.e. creates
12/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13///
14//===----------------------------------------------------------------------===//
15
16#include "TokenAnnotator.h"
17#include "clang/Basic/SourceManager.h"
18#include "clang/Lex/Lexer.h"
19
20namespace clang {
21namespace format {
22
Nico Weberee0feec2013-02-05 16:21:00 +000023static bool isUnaryOperator(const AnnotatedToken &Tok) {
24 switch (Tok.FormatTok.Tok.getKind()) {
25 case tok::plus:
26 case tok::plusplus:
27 case tok::minus:
28 case tok::minusminus:
29 case tok::exclaim:
30 case tok::tilde:
31 case tok::kw_sizeof:
32 case tok::kw_alignof:
33 return true;
34 default:
35 return false;
36 }
37}
38
Daniel Jasper32d28ee2013-01-29 21:01:14 +000039static bool isBinaryOperator(const AnnotatedToken &Tok) {
40 // Comma is a binary operator, but does not behave as such wrt. formatting.
41 return getPrecedence(Tok) > prec::Comma;
42}
43
Daniel Jasper01786732013-02-04 07:21:18 +000044// Returns the previous token ignoring comments.
45static const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) {
46 const AnnotatedToken *PrevToken = Tok.Parent;
47 while (PrevToken != NULL && PrevToken->is(tok::comment))
48 PrevToken = PrevToken->Parent;
49 return PrevToken;
50}
51
52// Returns the next token ignoring comments.
53static const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) {
54 if (Tok.Children.empty())
55 return NULL;
56 const AnnotatedToken *NextToken = &Tok.Children[0];
57 while (NextToken->is(tok::comment)) {
58 if (NextToken->Children.empty())
59 return NULL;
60 NextToken = &NextToken->Children[0];
61 }
62 return NextToken;
63}
64
Daniel Jasper32d28ee2013-01-29 21:01:14 +000065/// \brief A parser that gathers additional information about tokens.
66///
67/// The \c TokenAnnotator tries to matches parenthesis and square brakets and
68/// store a parenthesis levels. It also tries to resolve matching "<" and ">"
69/// into template parameter lists.
70class AnnotatingParser {
71public:
Daniel Jasper01786732013-02-04 07:21:18 +000072 AnnotatingParser(SourceManager &SourceMgr, Lexer &Lex, AnnotatedLine &Line)
73 : SourceMgr(SourceMgr), Lex(Lex), Line(Line), CurrentToken(&Line.First),
74 KeywordVirtualFound(false), ColonIsObjCMethodExpr(false),
Daniel Jasper63d7ced2013-02-05 10:07:47 +000075 LongestObjCSelectorName(0), FirstObjCSelectorName(NULL),
Daniel Jasper01786732013-02-04 07:21:18 +000076 ColonIsForRangeExpr(false), IsExpression(false),
77 LookForFunctionName(Line.MustBeDeclaration), BindingStrength(1) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +000078 }
79
80 /// \brief A helper class to manage AnnotatingParser::ColonIsObjCMethodExpr.
81 struct ObjCSelectorRAII {
82 AnnotatingParser &P;
83 bool ColonWasObjCMethodExpr;
84
85 ObjCSelectorRAII(AnnotatingParser &P)
86 : P(P), ColonWasObjCMethodExpr(P.ColonIsObjCMethodExpr) {
87 }
88
89 ~ObjCSelectorRAII() { P.ColonIsObjCMethodExpr = ColonWasObjCMethodExpr; }
90
91 void markStart(AnnotatedToken &Left) {
92 P.ColonIsObjCMethodExpr = true;
Daniel Jasper63d7ced2013-02-05 10:07:47 +000093 P.LongestObjCSelectorName = 0;
94 P.FirstObjCSelectorName = NULL;
Daniel Jasper32d28ee2013-01-29 21:01:14 +000095 Left.Type = TT_ObjCMethodExpr;
96 }
97
98 void markEnd(AnnotatedToken &Right) { Right.Type = TT_ObjCMethodExpr; }
99 };
100
Daniel Jasper01786732013-02-04 07:21:18 +0000101 struct ScopedBindingStrengthIncrease {
102 AnnotatingParser &P;
103 unsigned Increase;
104
105 ScopedBindingStrengthIncrease(AnnotatingParser &P, unsigned Increase)
106 : P(P), Increase(Increase) {
107 P.BindingStrength += Increase;
108 }
109
110 ~ScopedBindingStrengthIncrease() { P.BindingStrength -= Increase; }
111 };
112
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000113 bool parseAngle() {
114 if (CurrentToken == NULL)
115 return false;
Daniel Jasper01786732013-02-04 07:21:18 +0000116 ScopedBindingStrengthIncrease Increase(*this, 10);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000117 AnnotatedToken *Left = CurrentToken->Parent;
118 while (CurrentToken != NULL) {
119 if (CurrentToken->is(tok::greater)) {
120 Left->MatchingParen = CurrentToken;
121 CurrentToken->MatchingParen = Left;
122 CurrentToken->Type = TT_TemplateCloser;
123 next();
124 return true;
125 }
126 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square) ||
127 CurrentToken->is(tok::r_brace))
128 return false;
129 if (CurrentToken->is(tok::pipepipe) || CurrentToken->is(tok::ampamp) ||
130 CurrentToken->is(tok::question) || CurrentToken->is(tok::colon))
131 return false;
132 if (CurrentToken->is(tok::comma))
133 ++Left->ParameterCount;
134 if (!consumeToken())
135 return false;
136 }
137 return false;
138 }
139
140 bool parseParens(bool LookForDecls = false) {
141 if (CurrentToken == NULL)
142 return false;
Daniel Jasper01786732013-02-04 07:21:18 +0000143 ScopedBindingStrengthIncrease Increase(*this, 1);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000144 bool StartsObjCMethodExpr = false;
145 AnnotatedToken *Left = CurrentToken->Parent;
146 if (CurrentToken->is(tok::caret)) {
147 // ^( starts a block.
148 Left->Type = TT_ObjCBlockLParen;
149 } else if (AnnotatedToken *MaybeSel = Left->Parent) {
150 // @selector( starts a selector.
151 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent &&
152 MaybeSel->Parent->is(tok::at)) {
153 StartsObjCMethodExpr = true;
154 }
155 }
156
157 ObjCSelectorRAII objCSelector(*this);
158 if (StartsObjCMethodExpr)
159 objCSelector.markStart(*Left);
160
161 while (CurrentToken != NULL) {
162 // LookForDecls is set when "if (" has been seen. Check for
163 // 'identifier' '*' 'identifier' followed by not '=' -- this
164 // '*' has to be a binary operator but determineStarAmpUsage() will
165 // categorize it as an unary operator, so set the right type here.
166 if (LookForDecls && !CurrentToken->Children.empty()) {
167 AnnotatedToken &Prev = *CurrentToken->Parent;
168 AnnotatedToken &Next = CurrentToken->Children[0];
169 if (Prev.Parent->is(tok::identifier) &&
170 (Prev.is(tok::star) || Prev.is(tok::amp)) &&
171 CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) {
172 Prev.Type = TT_BinaryOperator;
173 LookForDecls = false;
174 }
175 }
176
177 if (CurrentToken->is(tok::r_paren)) {
178 Left->MatchingParen = CurrentToken;
179 CurrentToken->MatchingParen = Left;
180
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000181 if (StartsObjCMethodExpr) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000182 objCSelector.markEnd(*CurrentToken);
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000183 if (FirstObjCSelectorName != NULL) {
184 FirstObjCSelectorName->LongestObjCSelectorName =
185 LongestObjCSelectorName;
186 }
187 }
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000188
189 next();
190 return true;
191 }
192 if (CurrentToken->is(tok::r_square) || CurrentToken->is(tok::r_brace))
193 return false;
194 if (CurrentToken->is(tok::comma))
195 ++Left->ParameterCount;
196 if (!consumeToken())
197 return false;
198 }
199 return false;
200 }
201
202 bool parseSquare() {
203 if (!CurrentToken)
204 return false;
Daniel Jasper01786732013-02-04 07:21:18 +0000205 ScopedBindingStrengthIncrease Increase(*this, 10);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000206
207 // A '[' could be an index subscript (after an indentifier or after
208 // ')' or ']'), or it could be the start of an Objective-C method
209 // expression.
210 AnnotatedToken *Left = CurrentToken->Parent;
211 bool StartsObjCMethodExpr =
212 !Left->Parent || Left->Parent->is(tok::colon) ||
213 Left->Parent->is(tok::l_square) || Left->Parent->is(tok::l_paren) ||
214 Left->Parent->is(tok::kw_return) || Left->Parent->is(tok::kw_throw) ||
Nico Weberee0feec2013-02-05 16:21:00 +0000215 isUnaryOperator(*Left->Parent) ||
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000216 getBinOpPrecedence(Left->Parent->FormatTok.Tok.getKind(), true, true) >
217 prec::Unknown;
218
219 ObjCSelectorRAII objCSelector(*this);
220 if (StartsObjCMethodExpr)
221 objCSelector.markStart(*Left);
222
223 while (CurrentToken != NULL) {
224 if (CurrentToken->is(tok::r_square)) {
225 if (!CurrentToken->Children.empty() &&
226 CurrentToken->Children[0].is(tok::l_paren)) {
Nico Webere8a97982013-02-06 06:20:11 +0000227 // An ObjC method call is rarely followed by an open parenthesis.
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000228 // FIXME: Do we incorrectly label ":" with this?
229 StartsObjCMethodExpr = false;
230 Left->Type = TT_Unknown;
231 }
Daniel Jasper01786732013-02-04 07:21:18 +0000232 if (StartsObjCMethodExpr) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000233 objCSelector.markEnd(*CurrentToken);
Nico Webere8a97982013-02-06 06:20:11 +0000234 // determineStarAmpUsage() thinks that '*' '[' is allocating an
235 // array of pointers, but if '[' starts a selector then '*' is a
236 // binary operator.
Daniel Jasper01786732013-02-04 07:21:18 +0000237 if (Left->Parent != NULL &&
Nico Webere8a97982013-02-06 06:20:11 +0000238 (Left->Parent->is(tok::star) || Left->Parent->is(tok::amp)) &&
239 Left->Parent->Type == TT_PointerOrReference)
Daniel Jasper01786732013-02-04 07:21:18 +0000240 Left->Parent->Type = TT_BinaryOperator;
241 }
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000242 Left->MatchingParen = CurrentToken;
243 CurrentToken->MatchingParen = Left;
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000244 if (FirstObjCSelectorName != NULL)
245 FirstObjCSelectorName->LongestObjCSelectorName =
246 LongestObjCSelectorName;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000247 next();
248 return true;
249 }
250 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_brace))
251 return false;
252 if (CurrentToken->is(tok::comma))
253 ++Left->ParameterCount;
254 if (!consumeToken())
255 return false;
256 }
257 return false;
258 }
259
260 bool parseBrace() {
261 // Lines are fine to end with '{'.
262 if (CurrentToken == NULL)
263 return true;
Daniel Jasper01786732013-02-04 07:21:18 +0000264 ScopedBindingStrengthIncrease Increase(*this, 1);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000265 AnnotatedToken *Left = CurrentToken->Parent;
266 while (CurrentToken != NULL) {
267 if (CurrentToken->is(tok::r_brace)) {
268 Left->MatchingParen = CurrentToken;
269 CurrentToken->MatchingParen = Left;
270 next();
271 return true;
272 }
273 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square))
274 return false;
Daniel Jasperf343cab2013-01-31 14:59:26 +0000275 if (CurrentToken->is(tok::comma))
276 ++Left->ParameterCount;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000277 if (!consumeToken())
278 return false;
279 }
280 return true;
281 }
282
283 bool parseConditional() {
284 while (CurrentToken != NULL) {
285 if (CurrentToken->is(tok::colon)) {
286 CurrentToken->Type = TT_ConditionalExpr;
287 next();
288 return true;
289 }
290 if (!consumeToken())
291 return false;
292 }
293 return false;
294 }
295
296 bool parseTemplateDeclaration() {
297 if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
298 CurrentToken->Type = TT_TemplateOpener;
299 next();
300 if (!parseAngle())
301 return false;
302 CurrentToken->Parent->ClosesTemplateDeclaration = true;
303 return true;
304 }
305 return false;
306 }
307
308 bool consumeToken() {
309 AnnotatedToken *Tok = CurrentToken;
310 next();
311 switch (Tok->FormatTok.Tok.getKind()) {
312 case tok::plus:
313 case tok::minus:
314 // At the start of the line, +/- specific ObjectiveC method
315 // declarations.
316 if (Tok->Parent == NULL)
317 Tok->Type = TT_ObjCMethodSpecifier;
318 break;
319 case tok::colon:
320 // Colons from ?: are handled in parseConditional().
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000321 if (Tok->Parent->is(tok::r_paren)) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000322 Tok->Type = TT_CtorInitializerColon;
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000323 } else if (ColonIsObjCMethodExpr ||
324 Line.First.Type == TT_ObjCMethodSpecifier) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000325 Tok->Type = TT_ObjCMethodExpr;
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000326 Tok->Parent->Type = TT_ObjCSelectorName;
327 if (Tok->Parent->FormatTok.TokenLength > LongestObjCSelectorName)
328 LongestObjCSelectorName = Tok->Parent->FormatTok.TokenLength;
329 if (FirstObjCSelectorName == NULL)
330 FirstObjCSelectorName = Tok->Parent;
331 } else if (ColonIsForRangeExpr) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000332 Tok->Type = TT_RangeBasedForLoopColon;
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000333 }
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000334 break;
335 case tok::kw_if:
336 case tok::kw_while:
337 if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
338 next();
339 if (!parseParens(/*LookForDecls=*/ true))
340 return false;
341 }
342 break;
343 case tok::kw_for:
344 ColonIsForRangeExpr = true;
345 next();
346 if (!parseParens())
347 return false;
348 break;
349 case tok::l_paren:
350 if (!parseParens())
351 return false;
352 break;
353 case tok::l_square:
354 if (!parseSquare())
355 return false;
356 break;
357 case tok::l_brace:
358 if (!parseBrace())
359 return false;
360 break;
361 case tok::less:
362 if (parseAngle())
363 Tok->Type = TT_TemplateOpener;
364 else {
365 Tok->Type = TT_BinaryOperator;
366 CurrentToken = Tok;
367 next();
368 }
369 break;
370 case tok::r_paren:
371 case tok::r_square:
372 return false;
373 case tok::r_brace:
374 // Lines can start with '}'.
375 if (Tok->Parent != NULL)
376 return false;
377 break;
378 case tok::greater:
379 Tok->Type = TT_BinaryOperator;
380 break;
381 case tok::kw_operator:
382 if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
383 CurrentToken->Type = TT_OverloadedOperator;
384 next();
385 if (CurrentToken != NULL && CurrentToken->is(tok::r_paren)) {
386 CurrentToken->Type = TT_OverloadedOperator;
387 next();
388 }
389 } else {
390 while (CurrentToken != NULL && CurrentToken->isNot(tok::l_paren)) {
391 CurrentToken->Type = TT_OverloadedOperator;
392 next();
393 }
394 }
395 break;
396 case tok::question:
397 parseConditional();
398 break;
399 case tok::kw_template:
400 parseTemplateDeclaration();
401 break;
402 default:
403 break;
404 }
405 return true;
406 }
407
408 void parseIncludeDirective() {
409 next();
410 if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
411 next();
412 while (CurrentToken != NULL) {
413 if (CurrentToken->isNot(tok::comment) ||
414 !CurrentToken->Children.empty())
415 CurrentToken->Type = TT_ImplicitStringLiteral;
416 next();
417 }
418 } else {
419 while (CurrentToken != NULL) {
420 next();
421 }
422 }
423 }
424
425 void parseWarningOrError() {
426 next();
427 // We still want to format the whitespace left of the first token of the
428 // warning or error.
429 next();
430 while (CurrentToken != NULL) {
431 CurrentToken->Type = TT_ImplicitStringLiteral;
432 next();
433 }
434 }
435
436 void parsePreprocessorDirective() {
437 next();
438 if (CurrentToken == NULL)
439 return;
440 // Hashes in the middle of a line can lead to any strange token
441 // sequence.
442 if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL)
443 return;
444 switch (CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
445 case tok::pp_include:
446 case tok::pp_import:
447 parseIncludeDirective();
448 break;
449 case tok::pp_error:
450 case tok::pp_warning:
451 parseWarningOrError();
452 break;
453 default:
454 break;
455 }
Daniel Jasper5b7e7b02013-02-05 09:34:14 +0000456 while (CurrentToken != NULL)
457 next();
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000458 }
459
460 LineType parseLine() {
461 int PeriodsAndArrows = 0;
462 bool CanBeBuilderTypeStmt = true;
463 if (CurrentToken->is(tok::hash)) {
464 parsePreprocessorDirective();
465 return LT_PreprocessorDirective;
466 }
467 while (CurrentToken != NULL) {
468 if (CurrentToken->is(tok::kw_virtual))
469 KeywordVirtualFound = true;
470 if (CurrentToken->is(tok::period) || CurrentToken->is(tok::arrow))
471 ++PeriodsAndArrows;
472 if (getPrecedence(*CurrentToken) > prec::Assignment &&
473 CurrentToken->isNot(tok::less) && CurrentToken->isNot(tok::greater))
474 CanBeBuilderTypeStmt = false;
475 if (!consumeToken())
476 return LT_Invalid;
477 }
478 if (KeywordVirtualFound)
479 return LT_VirtualFunctionDecl;
480
481 // Assume a builder-type call if there are 2 or more "." and "->".
482 if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt)
483 return LT_BuilderTypeCall;
484
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000485 if (Line.First.Type == TT_ObjCMethodSpecifier) {
486 if (FirstObjCSelectorName != NULL)
487 FirstObjCSelectorName->LongestObjCSelectorName =
488 LongestObjCSelectorName;
489 return LT_ObjCMethodDecl;
490 }
491
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000492 return LT_Other;
493 }
494
495 void next() {
Daniel Jasper01786732013-02-04 07:21:18 +0000496 if (CurrentToken != NULL) {
497 determineTokenType(*CurrentToken);
498 CurrentToken->BindingStrength = BindingStrength;
499 }
500
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000501 if (CurrentToken != NULL && !CurrentToken->Children.empty())
502 CurrentToken = &CurrentToken->Children[0];
503 else
504 CurrentToken = NULL;
505 }
506
507private:
Daniel Jasper01786732013-02-04 07:21:18 +0000508 SourceManager &SourceMgr;
509 Lexer &Lex;
510 AnnotatedLine &Line;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000511 AnnotatedToken *CurrentToken;
512 bool KeywordVirtualFound;
513 bool ColonIsObjCMethodExpr;
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000514 unsigned LongestObjCSelectorName;
515 AnnotatedToken *FirstObjCSelectorName;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000516 bool ColonIsForRangeExpr;
Daniel Jasper01786732013-02-04 07:21:18 +0000517 bool IsExpression;
518 bool LookForFunctionName;
519
520 unsigned BindingStrength;
521
522 void determineTokenType(AnnotatedToken &Current) {
523 if (getPrecedence(Current) == prec::Assignment) {
524 IsExpression = true;
525 AnnotatedToken *Previous = Current.Parent;
526 while (Previous != NULL) {
527 if (Previous->Type == TT_BinaryOperator &&
528 (Previous->is(tok::star) || Previous->is(tok::amp))) {
529 Previous->Type = TT_PointerOrReference;
530 }
531 Previous = Previous->Parent;
532 }
533 }
534 if (Current.is(tok::kw_return) || Current.is(tok::kw_throw) ||
535 (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
536 (Current.Parent == NULL || Current.Parent->isNot(tok::kw_for))))
537 IsExpression = true;
538
539 if (Current.Type == TT_Unknown) {
540 if (LookForFunctionName && Current.is(tok::l_paren)) {
541 findFunctionName(&Current);
542 LookForFunctionName = false;
543 } else if (Current.is(tok::star) || Current.is(tok::amp)) {
544 Current.Type = determineStarAmpUsage(Current, IsExpression);
545 } else if (Current.is(tok::minus) || Current.is(tok::plus) ||
546 Current.is(tok::caret)) {
547 Current.Type = determinePlusMinusCaretUsage(Current);
548 } else if (Current.is(tok::minusminus) || Current.is(tok::plusplus)) {
549 Current.Type = determineIncrementUsage(Current);
550 } else if (Current.is(tok::exclaim)) {
551 Current.Type = TT_UnaryOperator;
552 } else if (isBinaryOperator(Current)) {
553 Current.Type = TT_BinaryOperator;
554 } else if (Current.is(tok::comment)) {
555 std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr,
556 Lex.getLangOpts()));
557 if (StringRef(Data).startswith("//"))
558 Current.Type = TT_LineComment;
559 else
560 Current.Type = TT_BlockComment;
561 } else if (Current.is(tok::r_paren) &&
562 (Current.Parent->Type == TT_PointerOrReference ||
563 Current.Parent->Type == TT_TemplateCloser) &&
564 (Current.Children.empty() ||
565 (Current.Children[0].isNot(tok::equal) &&
566 Current.Children[0].isNot(tok::semi) &&
567 Current.Children[0].isNot(tok::l_brace)))) {
568 // FIXME: We need to get smarter and understand more cases of casts.
569 Current.Type = TT_CastRParen;
570 } else if (Current.is(tok::at) && Current.Children.size()) {
571 switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) {
572 case tok::objc_interface:
573 case tok::objc_implementation:
574 case tok::objc_protocol:
575 Current.Type = TT_ObjCDecl;
576 break;
577 case tok::objc_property:
578 Current.Type = TT_ObjCProperty;
579 break;
580 default:
581 break;
582 }
583 }
584 }
585 }
586
587 /// \brief Starting from \p Current, this searches backwards for an
588 /// identifier which could be the start of a function name and marks it.
589 void findFunctionName(AnnotatedToken *Current) {
590 AnnotatedToken *Parent = Current->Parent;
591 while (Parent != NULL && Parent->Parent != NULL) {
592 if (Parent->is(tok::identifier) &&
593 (Parent->Parent->is(tok::identifier) ||
594 Parent->Parent->Type == TT_PointerOrReference ||
595 Parent->Parent->Type == TT_TemplateCloser)) {
596 Parent->Type = TT_StartOfName;
597 break;
598 }
599 Parent = Parent->Parent;
600 }
601 }
602
603 /// \brief Return the type of the given token assuming it is * or &.
604 TokenType
605 determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression) {
606 const AnnotatedToken *PrevToken = getPreviousToken(Tok);
607 if (PrevToken == NULL)
608 return TT_UnaryOperator;
609
610 const AnnotatedToken *NextToken = getNextToken(Tok);
611 if (NextToken == NULL)
612 return TT_Unknown;
613
Daniel Jasper01786732013-02-04 07:21:18 +0000614 if (PrevToken->is(tok::l_paren) || PrevToken->is(tok::l_square) ||
615 PrevToken->is(tok::l_brace) || PrevToken->is(tok::comma) ||
616 PrevToken->is(tok::kw_return) || PrevToken->is(tok::colon) ||
Nico Webere8a97982013-02-06 06:20:11 +0000617 PrevToken->is(tok::equal) || PrevToken->Type == TT_BinaryOperator ||
Daniel Jasper01786732013-02-04 07:21:18 +0000618 PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
619 return TT_UnaryOperator;
620
Nico Webere8a97982013-02-06 06:20:11 +0000621 if (NextToken->is(tok::l_square))
622 return TT_PointerOrReference;
623
Daniel Jasper01786732013-02-04 07:21:18 +0000624 if (PrevToken->FormatTok.Tok.isLiteral() || PrevToken->is(tok::r_paren) ||
625 PrevToken->is(tok::r_square) || NextToken->FormatTok.Tok.isLiteral() ||
Nico Weberee0feec2013-02-05 16:21:00 +0000626 isUnaryOperator(*NextToken) || NextToken->is(tok::l_paren) ||
627 NextToken->is(tok::l_square))
Daniel Jasper01786732013-02-04 07:21:18 +0000628 return TT_BinaryOperator;
629
630 if (NextToken->is(tok::comma) || NextToken->is(tok::r_paren) ||
631 NextToken->is(tok::greater))
632 return TT_PointerOrReference;
633
634 // It is very unlikely that we are going to find a pointer or reference type
635 // definition on the RHS of an assignment.
636 if (IsExpression)
637 return TT_BinaryOperator;
638
639 return TT_PointerOrReference;
640 }
641
642 TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) {
643 const AnnotatedToken *PrevToken = getPreviousToken(Tok);
644 if (PrevToken == NULL)
645 return TT_UnaryOperator;
646
647 // Use heuristics to recognize unary operators.
648 if (PrevToken->is(tok::equal) || PrevToken->is(tok::l_paren) ||
649 PrevToken->is(tok::comma) || PrevToken->is(tok::l_square) ||
650 PrevToken->is(tok::question) || PrevToken->is(tok::colon) ||
651 PrevToken->is(tok::kw_return) || PrevToken->is(tok::kw_case) ||
652 PrevToken->is(tok::at) || PrevToken->is(tok::l_brace))
653 return TT_UnaryOperator;
654
Nico Weberee0feec2013-02-05 16:21:00 +0000655 // There can't be two consecutive binary operators.
Daniel Jasper01786732013-02-04 07:21:18 +0000656 if (PrevToken->Type == TT_BinaryOperator)
657 return TT_UnaryOperator;
658
659 // Fall back to marking the token as binary operator.
660 return TT_BinaryOperator;
661 }
662
663 /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
664 TokenType determineIncrementUsage(const AnnotatedToken &Tok) {
665 const AnnotatedToken *PrevToken = getPreviousToken(Tok);
666 if (PrevToken == NULL)
667 return TT_UnaryOperator;
668 if (PrevToken->is(tok::r_paren) || PrevToken->is(tok::r_square) ||
669 PrevToken->is(tok::identifier))
670 return TT_TrailingUnaryOperator;
671
672 return TT_UnaryOperator;
673 }
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000674};
675
676void TokenAnnotator::annotate() {
Daniel Jasper01786732013-02-04 07:21:18 +0000677 AnnotatingParser Parser(SourceMgr, Lex, Line);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000678 Line.Type = Parser.parseLine();
679 if (Line.Type == LT_Invalid)
680 return;
681
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000682 if (Line.First.Type == TT_ObjCMethodSpecifier)
683 Line.Type = LT_ObjCMethodDecl;
684 else if (Line.First.Type == TT_ObjCDecl)
685 Line.Type = LT_ObjCDecl;
686 else if (Line.First.Type == TT_ObjCProperty)
687 Line.Type = LT_ObjCProperty;
688
689 Line.First.SpaceRequiredBefore = true;
690 Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore;
691 Line.First.CanBreakBefore = Line.First.MustBreakBefore;
692
693 Line.First.TotalLength = Line.First.FormatTok.TokenLength;
694 if (!Line.First.Children.empty())
Daniel Jasper01786732013-02-04 07:21:18 +0000695 calculateFormattingInformation(Line.First.Children[0]);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000696}
697
Daniel Jasper01786732013-02-04 07:21:18 +0000698void TokenAnnotator::calculateFormattingInformation(AnnotatedToken &Current) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000699 Current.SpaceRequiredBefore = spaceRequiredBefore(Current);
700
701 if (Current.FormatTok.MustBreakBefore) {
702 Current.MustBreakBefore = true;
Daniel Jasper2752ff32013-02-04 07:32:14 +0000703 } else if (Current.Type == TT_LineComment) {
704 Current.MustBreakBefore = Current.FormatTok.NewlinesBefore > 0;
705 } else if ((Current.Parent->is(tok::comment) &&
706 Current.FormatTok.NewlinesBefore > 0) ||
707 (Current.is(tok::string_literal) &&
708 Current.Parent->is(tok::string_literal))) {
709 Current.MustBreakBefore = true;
Daniel Jasperfa543ac2013-02-04 07:34:48 +0000710 } else if (Current.is(tok::lessless) && !Current.Children.empty() &&
711 Current.Parent->is(tok::string_literal) &&
712 Current.Children[0].is(tok::string_literal)) {
713 Current.MustBreakBefore = true;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000714 } else {
Daniel Jasper2752ff32013-02-04 07:32:14 +0000715 Current.MustBreakBefore = false;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000716 }
717 Current.CanBreakBefore = Current.MustBreakBefore || canBreakBefore(Current);
718 if (Current.MustBreakBefore)
719 Current.TotalLength = Current.Parent->TotalLength + Style.ColumnLimit;
720 else
721 Current.TotalLength =
722 Current.Parent->TotalLength + Current.FormatTok.TokenLength +
723 (Current.SpaceRequiredBefore ? 1 : 0);
724 // FIXME: Only calculate this if CanBreakBefore is true once static
725 // initializers etc. are sorted out.
Daniel Jasper01786732013-02-04 07:21:18 +0000726 // FIXME: Move magic numbers to a better place.
727 Current.SplitPenalty = 20 * Current.BindingStrength + splitPenalty(Current);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000728 if (!Current.Children.empty())
Daniel Jasper01786732013-02-04 07:21:18 +0000729 calculateFormattingInformation(Current.Children[0]);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000730}
731
732unsigned TokenAnnotator::splitPenalty(const AnnotatedToken &Tok) {
733 const AnnotatedToken &Left = *Tok.Parent;
734 const AnnotatedToken &Right = Tok;
735
736 if (Left.is(tok::l_brace) && Right.isNot(tok::l_brace))
737 return 50;
738 if (Left.is(tok::equal) && Right.is(tok::l_brace))
739 return 150;
740 if (Left.is(tok::coloncolon))
741 return 500;
742
743 if (Left.Type == TT_RangeBasedForLoopColon)
744 return 5;
745
746 if (Right.is(tok::arrow) || Right.is(tok::period)) {
747 if (Left.is(tok::r_paren) && Line.Type == LT_BuilderTypeCall)
748 return 5; // Should be smaller than breaking at a nested comma.
749 return 150;
750 }
751
752 // In for-loops, prefer breaking at ',' and ';'.
753 if (Line.First.is(tok::kw_for) &&
754 (Left.isNot(tok::comma) && Left.isNot(tok::semi)))
755 return 20;
756
Daniel Jasper8159d2f2013-02-04 07:30:30 +0000757 if (Left.is(tok::semi))
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000758 return 0;
Daniel Jasper8159d2f2013-02-04 07:30:30 +0000759 if (Left.is(tok::comma))
760 return 1;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000761
762 // In Objective-C method expressions, prefer breaking before "param:" over
763 // breaking after it.
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000764 if (Right.Type == TT_ObjCSelectorName)
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000765 return 0;
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000766 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000767 return 20;
768
Daniel Jasper01786732013-02-04 07:21:18 +0000769 if (Left.is(tok::l_paren) || Left.is(tok::l_square) ||
770 Left.Type == TT_TemplateOpener)
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000771 return 20;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000772
Daniel Jasper01786732013-02-04 07:21:18 +0000773 if (Right.is(tok::lessless))
774 return prec::Shift;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000775 if (Left.Type == TT_ConditionalExpr)
776 return prec::Assignment;
777 prec::Level Level = getPrecedence(Left);
778
779 if (Level != prec::Unknown)
780 return Level;
781
782 return 3;
783}
784
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000785bool TokenAnnotator::spaceRequiredBetween(const AnnotatedToken &Left,
786 const AnnotatedToken &Right) {
787 if (Right.is(tok::hashhash))
788 return Left.is(tok::hash);
789 if (Left.is(tok::hashhash) || Left.is(tok::hash))
790 return Right.is(tok::hash);
791 if (Right.is(tok::r_paren) || Right.is(tok::semi) || Right.is(tok::comma))
792 return false;
793 if (Right.is(tok::less) &&
794 (Left.is(tok::kw_template) ||
795 (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
796 return true;
797 if (Left.is(tok::arrow) || Right.is(tok::arrow))
798 return false;
799 if (Left.is(tok::exclaim) || Left.is(tok::tilde))
800 return false;
801 if (Left.is(tok::at) &&
802 (Right.is(tok::identifier) || Right.is(tok::string_literal) ||
803 Right.is(tok::char_constant) || Right.is(tok::numeric_constant) ||
804 Right.is(tok::l_paren) || Right.is(tok::l_brace) ||
805 Right.is(tok::kw_true) || Right.is(tok::kw_false)))
806 return false;
807 if (Left.is(tok::coloncolon))
808 return false;
809 if (Right.is(tok::coloncolon))
810 return Left.isNot(tok::identifier) && Left.isNot(tok::greater);
811 if (Left.is(tok::less) || Right.is(tok::greater) || Right.is(tok::less))
812 return false;
813 if (Right.is(tok::amp) || Right.is(tok::star))
814 return Left.FormatTok.Tok.isLiteral() ||
815 (Left.isNot(tok::star) && Left.isNot(tok::amp) &&
816 !Style.PointerAndReferenceBindToType);
817 if (Left.is(tok::amp) || Left.is(tok::star))
818 return Right.FormatTok.Tok.isLiteral() ||
819 Style.PointerAndReferenceBindToType;
820 if (Right.is(tok::star) && Left.is(tok::l_paren))
821 return false;
822 if (Left.is(tok::l_square) || Right.is(tok::r_square))
823 return false;
824 if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr)
825 return false;
826 if (Left.is(tok::period) || Right.is(tok::period))
827 return false;
828 if (Left.is(tok::colon))
829 return Left.Type != TT_ObjCMethodExpr;
830 if (Right.is(tok::colon))
831 return Right.Type != TT_ObjCMethodExpr;
832 if (Left.is(tok::l_paren))
833 return false;
834 if (Right.is(tok::l_paren)) {
835 return Line.Type == LT_ObjCDecl || Left.is(tok::kw_if) ||
836 Left.is(tok::kw_for) || Left.is(tok::kw_while) ||
837 Left.is(tok::kw_switch) || Left.is(tok::kw_return) ||
838 Left.is(tok::kw_catch) || Left.is(tok::kw_new) ||
839 Left.is(tok::kw_delete);
840 }
841 if (Left.is(tok::at) &&
842 Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword)
843 return false;
844 if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
845 return false;
846 return true;
847}
848
849bool TokenAnnotator::spaceRequiredBefore(const AnnotatedToken &Tok) {
850 if (Line.Type == LT_ObjCMethodDecl) {
851 if (Tok.is(tok::identifier) && !Tok.Children.empty() &&
852 Tok.Children[0].is(tok::colon) && Tok.Parent->is(tok::identifier))
853 return true;
854 if (Tok.is(tok::colon))
855 return false;
856 if (Tok.Parent->Type == TT_ObjCMethodSpecifier)
857 return true;
858 if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier))
859 // Don't space between ')' and <id>
860 return false;
861 if (Tok.Parent->is(tok::colon) && Tok.is(tok::l_paren))
862 // Don't space between ':' and '('
863 return false;
864 }
865 if (Line.Type == LT_ObjCProperty &&
866 (Tok.is(tok::equal) || Tok.Parent->is(tok::equal)))
867 return false;
868
869 if (Tok.Parent->is(tok::comma))
870 return true;
871 if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen)
872 return true;
873 if (Tok.Type == TT_OverloadedOperator)
874 return Tok.is(tok::identifier) || Tok.is(tok::kw_new) ||
875 Tok.is(tok::kw_delete) || Tok.is(tok::kw_bool);
876 if (Tok.Parent->Type == TT_OverloadedOperator)
877 return false;
878 if (Tok.is(tok::colon))
879 return Line.First.isNot(tok::kw_case) && !Tok.Children.empty() &&
880 Tok.Type != TT_ObjCMethodExpr;
881 if (Tok.Parent->Type == TT_UnaryOperator || Tok.Parent->Type == TT_CastRParen)
882 return false;
883 if (Tok.Type == TT_UnaryOperator)
884 return Tok.Parent->isNot(tok::l_paren) &&
885 Tok.Parent->isNot(tok::l_square) && Tok.Parent->isNot(tok::at) &&
886 (Tok.Parent->isNot(tok::colon) ||
887 Tok.Parent->Type != TT_ObjCMethodExpr);
888 if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) {
889 return Tok.Type == TT_TemplateCloser && Tok.Parent->Type ==
890 TT_TemplateCloser && Style.SplitTemplateClosingGreater;
891 }
892 if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator)
893 return true;
894 if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
895 return false;
896 if (Tok.is(tok::less) && Line.First.is(tok::hash))
897 return true;
898 if (Tok.Type == TT_TrailingUnaryOperator)
899 return false;
900 return spaceRequiredBetween(*Tok.Parent, Tok);
901}
902
903bool TokenAnnotator::canBreakBefore(const AnnotatedToken &Right) {
904 const AnnotatedToken &Left = *Right.Parent;
905 if (Line.Type == LT_ObjCMethodDecl) {
906 if (Right.is(tok::identifier) && !Right.Children.empty() &&
907 Right.Children[0].is(tok::colon) && Left.is(tok::identifier))
908 return true;
909 if (Right.is(tok::identifier) && Left.is(tok::l_paren) &&
910 Left.Parent->is(tok::colon))
911 // Don't break this identifier as ':' or identifier
912 // before it will break.
913 return false;
914 if (Right.is(tok::colon) && Left.is(tok::identifier) && Left.CanBreakBefore)
915 // Don't break at ':' if identifier before it can beak.
916 return false;
917 }
918 if (Right.Type == TT_StartOfName && Style.AllowReturnTypeOnItsOwnLine)
919 return true;
920 if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
921 return false;
922 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
923 return true;
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000924 if (Right.Type == TT_ObjCSelectorName)
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000925 return true;
926 if (Left.ClosesTemplateDeclaration)
927 return true;
928 if (Right.Type == TT_ConditionalExpr || Right.is(tok::question))
929 return true;
930 if (Left.Type == TT_RangeBasedForLoopColon)
931 return true;
932 if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
933 Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr ||
934 Left.is(tok::question))
935 return false;
936 if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
937 return false;
938
939 if (Right.Type == TT_LineComment)
940 // We rely on MustBreakBefore being set correctly here as we should not
941 // change the "binding" behavior of a comment.
942 return false;
943
944 // Allow breaking after a trailing 'const', e.g. after a method declaration,
945 // unless it is follow by ';', '{' or '='.
946 if (Left.is(tok::kw_const) && Left.Parent != NULL &&
947 Left.Parent->is(tok::r_paren))
948 return Right.isNot(tok::l_brace) && Right.isNot(tok::semi) &&
949 Right.isNot(tok::equal);
950
951 // We only break before r_brace if there was a corresponding break before
952 // the l_brace, which is tracked by BreakBeforeClosingBrace.
953 if (Right.is(tok::r_brace))
954 return false;
955
956 if (Right.is(tok::r_paren) || Right.is(tok::greater))
957 return false;
958 return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) ||
959 Left.is(tok::comma) || Right.is(tok::lessless) ||
960 Right.is(tok::arrow) || Right.is(tok::period) ||
961 Right.is(tok::colon) || Left.is(tok::coloncolon) ||
962 Left.is(tok::semi) || Left.is(tok::l_brace) ||
963 (Left.is(tok::r_paren) && Left.Type != TT_CastRParen &&
964 Right.is(tok::identifier)) ||
965 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
966 (Left.is(tok::l_square) && !Right.is(tok::r_square));
967}
968
969} // namespace format
970} // namespace clang