blob: f34bc89cfb9f23f7e011d1dd302408fde67189c6 [file] [log] [blame]
Daniel Jasper32d28ee2013-01-29 21:01:14 +00001//===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements a token annotator, i.e. creates
12/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13///
14//===----------------------------------------------------------------------===//
15
16#include "TokenAnnotator.h"
17#include "clang/Basic/SourceManager.h"
18#include "clang/Lex/Lexer.h"
19
20namespace clang {
21namespace format {
22
23/// \brief Returns if a token is an Objective-C selector name.
24///
25/// For example, "bar" is a selector name in [foo bar:(4 + 5)].
26static bool isObjCSelectorName(const AnnotatedToken &Tok) {
27 return Tok.is(tok::identifier) && !Tok.Children.empty() &&
28 Tok.Children[0].is(tok::colon) &&
29 Tok.Children[0].Type == TT_ObjCMethodExpr;
30}
31
32static bool isBinaryOperator(const AnnotatedToken &Tok) {
33 // Comma is a binary operator, but does not behave as such wrt. formatting.
34 return getPrecedence(Tok) > prec::Comma;
35}
36
Daniel Jasper01786732013-02-04 07:21:18 +000037// Returns the previous token ignoring comments.
38static const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) {
39 const AnnotatedToken *PrevToken = Tok.Parent;
40 while (PrevToken != NULL && PrevToken->is(tok::comment))
41 PrevToken = PrevToken->Parent;
42 return PrevToken;
43}
44
45// Returns the next token ignoring comments.
46static const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) {
47 if (Tok.Children.empty())
48 return NULL;
49 const AnnotatedToken *NextToken = &Tok.Children[0];
50 while (NextToken->is(tok::comment)) {
51 if (NextToken->Children.empty())
52 return NULL;
53 NextToken = &NextToken->Children[0];
54 }
55 return NextToken;
56}
57
Daniel Jasper32d28ee2013-01-29 21:01:14 +000058/// \brief A parser that gathers additional information about tokens.
59///
60/// The \c TokenAnnotator tries to matches parenthesis and square brakets and
61/// store a parenthesis levels. It also tries to resolve matching "<" and ">"
62/// into template parameter lists.
63class AnnotatingParser {
64public:
Daniel Jasper01786732013-02-04 07:21:18 +000065 AnnotatingParser(SourceManager &SourceMgr, Lexer &Lex, AnnotatedLine &Line)
66 : SourceMgr(SourceMgr), Lex(Lex), Line(Line), CurrentToken(&Line.First),
67 KeywordVirtualFound(false), ColonIsObjCMethodExpr(false),
68 ColonIsForRangeExpr(false), IsExpression(false),
69 LookForFunctionName(Line.MustBeDeclaration), BindingStrength(1) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +000070 }
71
72 /// \brief A helper class to manage AnnotatingParser::ColonIsObjCMethodExpr.
73 struct ObjCSelectorRAII {
74 AnnotatingParser &P;
75 bool ColonWasObjCMethodExpr;
76
77 ObjCSelectorRAII(AnnotatingParser &P)
78 : P(P), ColonWasObjCMethodExpr(P.ColonIsObjCMethodExpr) {
79 }
80
81 ~ObjCSelectorRAII() { P.ColonIsObjCMethodExpr = ColonWasObjCMethodExpr; }
82
83 void markStart(AnnotatedToken &Left) {
84 P.ColonIsObjCMethodExpr = true;
85 Left.Type = TT_ObjCMethodExpr;
86 }
87
88 void markEnd(AnnotatedToken &Right) { Right.Type = TT_ObjCMethodExpr; }
89 };
90
Daniel Jasper01786732013-02-04 07:21:18 +000091 struct ScopedBindingStrengthIncrease {
92 AnnotatingParser &P;
93 unsigned Increase;
94
95 ScopedBindingStrengthIncrease(AnnotatingParser &P, unsigned Increase)
96 : P(P), Increase(Increase) {
97 P.BindingStrength += Increase;
98 }
99
100 ~ScopedBindingStrengthIncrease() { P.BindingStrength -= Increase; }
101 };
102
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000103 bool parseAngle() {
104 if (CurrentToken == NULL)
105 return false;
Daniel Jasper01786732013-02-04 07:21:18 +0000106 ScopedBindingStrengthIncrease Increase(*this, 10);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000107 AnnotatedToken *Left = CurrentToken->Parent;
108 while (CurrentToken != NULL) {
109 if (CurrentToken->is(tok::greater)) {
110 Left->MatchingParen = CurrentToken;
111 CurrentToken->MatchingParen = Left;
112 CurrentToken->Type = TT_TemplateCloser;
113 next();
114 return true;
115 }
116 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square) ||
117 CurrentToken->is(tok::r_brace))
118 return false;
119 if (CurrentToken->is(tok::pipepipe) || CurrentToken->is(tok::ampamp) ||
120 CurrentToken->is(tok::question) || CurrentToken->is(tok::colon))
121 return false;
122 if (CurrentToken->is(tok::comma))
123 ++Left->ParameterCount;
124 if (!consumeToken())
125 return false;
126 }
127 return false;
128 }
129
130 bool parseParens(bool LookForDecls = false) {
131 if (CurrentToken == NULL)
132 return false;
Daniel Jasper01786732013-02-04 07:21:18 +0000133 ScopedBindingStrengthIncrease Increase(*this, 1);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000134 bool StartsObjCMethodExpr = false;
135 AnnotatedToken *Left = CurrentToken->Parent;
136 if (CurrentToken->is(tok::caret)) {
137 // ^( starts a block.
138 Left->Type = TT_ObjCBlockLParen;
139 } else if (AnnotatedToken *MaybeSel = Left->Parent) {
140 // @selector( starts a selector.
141 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent &&
142 MaybeSel->Parent->is(tok::at)) {
143 StartsObjCMethodExpr = true;
144 }
145 }
146
147 ObjCSelectorRAII objCSelector(*this);
148 if (StartsObjCMethodExpr)
149 objCSelector.markStart(*Left);
150
151 while (CurrentToken != NULL) {
152 // LookForDecls is set when "if (" has been seen. Check for
153 // 'identifier' '*' 'identifier' followed by not '=' -- this
154 // '*' has to be a binary operator but determineStarAmpUsage() will
155 // categorize it as an unary operator, so set the right type here.
156 if (LookForDecls && !CurrentToken->Children.empty()) {
157 AnnotatedToken &Prev = *CurrentToken->Parent;
158 AnnotatedToken &Next = CurrentToken->Children[0];
159 if (Prev.Parent->is(tok::identifier) &&
160 (Prev.is(tok::star) || Prev.is(tok::amp)) &&
161 CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) {
162 Prev.Type = TT_BinaryOperator;
163 LookForDecls = false;
164 }
165 }
166
167 if (CurrentToken->is(tok::r_paren)) {
168 Left->MatchingParen = CurrentToken;
169 CurrentToken->MatchingParen = Left;
170
171 if (StartsObjCMethodExpr)
172 objCSelector.markEnd(*CurrentToken);
173
174 next();
175 return true;
176 }
177 if (CurrentToken->is(tok::r_square) || CurrentToken->is(tok::r_brace))
178 return false;
179 if (CurrentToken->is(tok::comma))
180 ++Left->ParameterCount;
181 if (!consumeToken())
182 return false;
183 }
184 return false;
185 }
186
187 bool parseSquare() {
188 if (!CurrentToken)
189 return false;
Daniel Jasper01786732013-02-04 07:21:18 +0000190 ScopedBindingStrengthIncrease Increase(*this, 10);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000191
192 // A '[' could be an index subscript (after an indentifier or after
193 // ')' or ']'), or it could be the start of an Objective-C method
194 // expression.
195 AnnotatedToken *Left = CurrentToken->Parent;
196 bool StartsObjCMethodExpr =
197 !Left->Parent || Left->Parent->is(tok::colon) ||
198 Left->Parent->is(tok::l_square) || Left->Parent->is(tok::l_paren) ||
199 Left->Parent->is(tok::kw_return) || Left->Parent->is(tok::kw_throw) ||
200 getBinOpPrecedence(Left->Parent->FormatTok.Tok.getKind(), true, true) >
201 prec::Unknown;
202
203 ObjCSelectorRAII objCSelector(*this);
204 if (StartsObjCMethodExpr)
205 objCSelector.markStart(*Left);
206
207 while (CurrentToken != NULL) {
208 if (CurrentToken->is(tok::r_square)) {
209 if (!CurrentToken->Children.empty() &&
210 CurrentToken->Children[0].is(tok::l_paren)) {
211 // An ObjC method call can't be followed by an open parenthesis.
212 // FIXME: Do we incorrectly label ":" with this?
213 StartsObjCMethodExpr = false;
214 Left->Type = TT_Unknown;
215 }
Daniel Jasper01786732013-02-04 07:21:18 +0000216 if (StartsObjCMethodExpr) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000217 objCSelector.markEnd(*CurrentToken);
Daniel Jasper01786732013-02-04 07:21:18 +0000218 if (Left->Parent != NULL &&
219 (Left->Parent->is(tok::star) || Left->Parent->is(tok::amp)))
220 Left->Parent->Type = TT_BinaryOperator;
221 }
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000222 Left->MatchingParen = CurrentToken;
223 CurrentToken->MatchingParen = Left;
224 next();
225 return true;
226 }
227 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_brace))
228 return false;
229 if (CurrentToken->is(tok::comma))
230 ++Left->ParameterCount;
231 if (!consumeToken())
232 return false;
233 }
234 return false;
235 }
236
237 bool parseBrace() {
238 // Lines are fine to end with '{'.
239 if (CurrentToken == NULL)
240 return true;
Daniel Jasper01786732013-02-04 07:21:18 +0000241 ScopedBindingStrengthIncrease Increase(*this, 1);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000242 AnnotatedToken *Left = CurrentToken->Parent;
243 while (CurrentToken != NULL) {
244 if (CurrentToken->is(tok::r_brace)) {
245 Left->MatchingParen = CurrentToken;
246 CurrentToken->MatchingParen = Left;
247 next();
248 return true;
249 }
250 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square))
251 return false;
Daniel Jasperf343cab2013-01-31 14:59:26 +0000252 if (CurrentToken->is(tok::comma))
253 ++Left->ParameterCount;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000254 if (!consumeToken())
255 return false;
256 }
257 return true;
258 }
259
260 bool parseConditional() {
261 while (CurrentToken != NULL) {
262 if (CurrentToken->is(tok::colon)) {
263 CurrentToken->Type = TT_ConditionalExpr;
264 next();
265 return true;
266 }
267 if (!consumeToken())
268 return false;
269 }
270 return false;
271 }
272
273 bool parseTemplateDeclaration() {
274 if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
275 CurrentToken->Type = TT_TemplateOpener;
276 next();
277 if (!parseAngle())
278 return false;
279 CurrentToken->Parent->ClosesTemplateDeclaration = true;
280 return true;
281 }
282 return false;
283 }
284
285 bool consumeToken() {
286 AnnotatedToken *Tok = CurrentToken;
287 next();
288 switch (Tok->FormatTok.Tok.getKind()) {
289 case tok::plus:
290 case tok::minus:
291 // At the start of the line, +/- specific ObjectiveC method
292 // declarations.
293 if (Tok->Parent == NULL)
294 Tok->Type = TT_ObjCMethodSpecifier;
295 break;
296 case tok::colon:
297 // Colons from ?: are handled in parseConditional().
298 if (Tok->Parent->is(tok::r_paren))
299 Tok->Type = TT_CtorInitializerColon;
300 else if (ColonIsObjCMethodExpr)
301 Tok->Type = TT_ObjCMethodExpr;
302 else if (ColonIsForRangeExpr)
303 Tok->Type = TT_RangeBasedForLoopColon;
304 break;
305 case tok::kw_if:
306 case tok::kw_while:
307 if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
308 next();
309 if (!parseParens(/*LookForDecls=*/ true))
310 return false;
311 }
312 break;
313 case tok::kw_for:
314 ColonIsForRangeExpr = true;
315 next();
316 if (!parseParens())
317 return false;
318 break;
319 case tok::l_paren:
320 if (!parseParens())
321 return false;
322 break;
323 case tok::l_square:
324 if (!parseSquare())
325 return false;
326 break;
327 case tok::l_brace:
328 if (!parseBrace())
329 return false;
330 break;
331 case tok::less:
332 if (parseAngle())
333 Tok->Type = TT_TemplateOpener;
334 else {
335 Tok->Type = TT_BinaryOperator;
336 CurrentToken = Tok;
337 next();
338 }
339 break;
340 case tok::r_paren:
341 case tok::r_square:
342 return false;
343 case tok::r_brace:
344 // Lines can start with '}'.
345 if (Tok->Parent != NULL)
346 return false;
347 break;
348 case tok::greater:
349 Tok->Type = TT_BinaryOperator;
350 break;
351 case tok::kw_operator:
352 if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
353 CurrentToken->Type = TT_OverloadedOperator;
354 next();
355 if (CurrentToken != NULL && CurrentToken->is(tok::r_paren)) {
356 CurrentToken->Type = TT_OverloadedOperator;
357 next();
358 }
359 } else {
360 while (CurrentToken != NULL && CurrentToken->isNot(tok::l_paren)) {
361 CurrentToken->Type = TT_OverloadedOperator;
362 next();
363 }
364 }
365 break;
366 case tok::question:
367 parseConditional();
368 break;
369 case tok::kw_template:
370 parseTemplateDeclaration();
371 break;
372 default:
373 break;
374 }
375 return true;
376 }
377
378 void parseIncludeDirective() {
379 next();
380 if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
381 next();
382 while (CurrentToken != NULL) {
383 if (CurrentToken->isNot(tok::comment) ||
384 !CurrentToken->Children.empty())
385 CurrentToken->Type = TT_ImplicitStringLiteral;
386 next();
387 }
388 } else {
389 while (CurrentToken != NULL) {
390 next();
391 }
392 }
393 }
394
395 void parseWarningOrError() {
396 next();
397 // We still want to format the whitespace left of the first token of the
398 // warning or error.
399 next();
400 while (CurrentToken != NULL) {
401 CurrentToken->Type = TT_ImplicitStringLiteral;
402 next();
403 }
404 }
405
406 void parsePreprocessorDirective() {
407 next();
408 if (CurrentToken == NULL)
409 return;
410 // Hashes in the middle of a line can lead to any strange token
411 // sequence.
412 if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL)
413 return;
414 switch (CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
415 case tok::pp_include:
416 case tok::pp_import:
417 parseIncludeDirective();
418 break;
419 case tok::pp_error:
420 case tok::pp_warning:
421 parseWarningOrError();
422 break;
423 default:
424 break;
425 }
Daniel Jasper5b7e7b02013-02-05 09:34:14 +0000426 while (CurrentToken != NULL)
427 next();
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000428 }
429
430 LineType parseLine() {
431 int PeriodsAndArrows = 0;
432 bool CanBeBuilderTypeStmt = true;
433 if (CurrentToken->is(tok::hash)) {
434 parsePreprocessorDirective();
435 return LT_PreprocessorDirective;
436 }
437 while (CurrentToken != NULL) {
438 if (CurrentToken->is(tok::kw_virtual))
439 KeywordVirtualFound = true;
440 if (CurrentToken->is(tok::period) || CurrentToken->is(tok::arrow))
441 ++PeriodsAndArrows;
442 if (getPrecedence(*CurrentToken) > prec::Assignment &&
443 CurrentToken->isNot(tok::less) && CurrentToken->isNot(tok::greater))
444 CanBeBuilderTypeStmt = false;
445 if (!consumeToken())
446 return LT_Invalid;
447 }
448 if (KeywordVirtualFound)
449 return LT_VirtualFunctionDecl;
450
451 // Assume a builder-type call if there are 2 or more "." and "->".
452 if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt)
453 return LT_BuilderTypeCall;
454
455 return LT_Other;
456 }
457
458 void next() {
Daniel Jasper01786732013-02-04 07:21:18 +0000459 if (CurrentToken != NULL) {
460 determineTokenType(*CurrentToken);
461 CurrentToken->BindingStrength = BindingStrength;
462 }
463
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000464 if (CurrentToken != NULL && !CurrentToken->Children.empty())
465 CurrentToken = &CurrentToken->Children[0];
466 else
467 CurrentToken = NULL;
468 }
469
470private:
Daniel Jasper01786732013-02-04 07:21:18 +0000471 SourceManager &SourceMgr;
472 Lexer &Lex;
473 AnnotatedLine &Line;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000474 AnnotatedToken *CurrentToken;
475 bool KeywordVirtualFound;
476 bool ColonIsObjCMethodExpr;
477 bool ColonIsForRangeExpr;
Daniel Jasper01786732013-02-04 07:21:18 +0000478 bool IsExpression;
479 bool LookForFunctionName;
480
481 unsigned BindingStrength;
482
483 void determineTokenType(AnnotatedToken &Current) {
484 if (getPrecedence(Current) == prec::Assignment) {
485 IsExpression = true;
486 AnnotatedToken *Previous = Current.Parent;
487 while (Previous != NULL) {
488 if (Previous->Type == TT_BinaryOperator &&
489 (Previous->is(tok::star) || Previous->is(tok::amp))) {
490 Previous->Type = TT_PointerOrReference;
491 }
492 Previous = Previous->Parent;
493 }
494 }
495 if (Current.is(tok::kw_return) || Current.is(tok::kw_throw) ||
496 (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
497 (Current.Parent == NULL || Current.Parent->isNot(tok::kw_for))))
498 IsExpression = true;
499
500 if (Current.Type == TT_Unknown) {
501 if (LookForFunctionName && Current.is(tok::l_paren)) {
502 findFunctionName(&Current);
503 LookForFunctionName = false;
504 } else if (Current.is(tok::star) || Current.is(tok::amp)) {
505 Current.Type = determineStarAmpUsage(Current, IsExpression);
506 } else if (Current.is(tok::minus) || Current.is(tok::plus) ||
507 Current.is(tok::caret)) {
508 Current.Type = determinePlusMinusCaretUsage(Current);
509 } else if (Current.is(tok::minusminus) || Current.is(tok::plusplus)) {
510 Current.Type = determineIncrementUsage(Current);
511 } else if (Current.is(tok::exclaim)) {
512 Current.Type = TT_UnaryOperator;
513 } else if (isBinaryOperator(Current)) {
514 Current.Type = TT_BinaryOperator;
515 } else if (Current.is(tok::comment)) {
516 std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr,
517 Lex.getLangOpts()));
518 if (StringRef(Data).startswith("//"))
519 Current.Type = TT_LineComment;
520 else
521 Current.Type = TT_BlockComment;
522 } else if (Current.is(tok::r_paren) &&
523 (Current.Parent->Type == TT_PointerOrReference ||
524 Current.Parent->Type == TT_TemplateCloser) &&
525 (Current.Children.empty() ||
526 (Current.Children[0].isNot(tok::equal) &&
527 Current.Children[0].isNot(tok::semi) &&
528 Current.Children[0].isNot(tok::l_brace)))) {
529 // FIXME: We need to get smarter and understand more cases of casts.
530 Current.Type = TT_CastRParen;
531 } else if (Current.is(tok::at) && Current.Children.size()) {
532 switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) {
533 case tok::objc_interface:
534 case tok::objc_implementation:
535 case tok::objc_protocol:
536 Current.Type = TT_ObjCDecl;
537 break;
538 case tok::objc_property:
539 Current.Type = TT_ObjCProperty;
540 break;
541 default:
542 break;
543 }
544 }
545 }
546 }
547
548 /// \brief Starting from \p Current, this searches backwards for an
549 /// identifier which could be the start of a function name and marks it.
550 void findFunctionName(AnnotatedToken *Current) {
551 AnnotatedToken *Parent = Current->Parent;
552 while (Parent != NULL && Parent->Parent != NULL) {
553 if (Parent->is(tok::identifier) &&
554 (Parent->Parent->is(tok::identifier) ||
555 Parent->Parent->Type == TT_PointerOrReference ||
556 Parent->Parent->Type == TT_TemplateCloser)) {
557 Parent->Type = TT_StartOfName;
558 break;
559 }
560 Parent = Parent->Parent;
561 }
562 }
563
564 /// \brief Return the type of the given token assuming it is * or &.
565 TokenType
566 determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression) {
567 const AnnotatedToken *PrevToken = getPreviousToken(Tok);
568 if (PrevToken == NULL)
569 return TT_UnaryOperator;
570
571 const AnnotatedToken *NextToken = getNextToken(Tok);
572 if (NextToken == NULL)
573 return TT_Unknown;
574
575 if (NextToken->is(tok::l_square))
576 return TT_PointerOrReference;
577
578 if (PrevToken->is(tok::l_paren) || PrevToken->is(tok::l_square) ||
579 PrevToken->is(tok::l_brace) || PrevToken->is(tok::comma) ||
580 PrevToken->is(tok::kw_return) || PrevToken->is(tok::colon) ||
581 PrevToken->Type == TT_BinaryOperator ||
582 PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
583 return TT_UnaryOperator;
584
585 if (PrevToken->FormatTok.Tok.isLiteral() || PrevToken->is(tok::r_paren) ||
586 PrevToken->is(tok::r_square) || NextToken->FormatTok.Tok.isLiteral() ||
587 NextToken->is(tok::plus) || NextToken->is(tok::minus) ||
588 NextToken->is(tok::plusplus) || NextToken->is(tok::minusminus) ||
589 NextToken->is(tok::tilde) || NextToken->is(tok::exclaim) ||
590 NextToken->is(tok::l_paren) || NextToken->is(tok::l_square) ||
591 NextToken->is(tok::kw_alignof) || NextToken->is(tok::kw_sizeof))
592 return TT_BinaryOperator;
593
594 if (NextToken->is(tok::comma) || NextToken->is(tok::r_paren) ||
595 NextToken->is(tok::greater))
596 return TT_PointerOrReference;
597
598 // It is very unlikely that we are going to find a pointer or reference type
599 // definition on the RHS of an assignment.
600 if (IsExpression)
601 return TT_BinaryOperator;
602
603 return TT_PointerOrReference;
604 }
605
606 TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) {
607 const AnnotatedToken *PrevToken = getPreviousToken(Tok);
608 if (PrevToken == NULL)
609 return TT_UnaryOperator;
610
611 // Use heuristics to recognize unary operators.
612 if (PrevToken->is(tok::equal) || PrevToken->is(tok::l_paren) ||
613 PrevToken->is(tok::comma) || PrevToken->is(tok::l_square) ||
614 PrevToken->is(tok::question) || PrevToken->is(tok::colon) ||
615 PrevToken->is(tok::kw_return) || PrevToken->is(tok::kw_case) ||
616 PrevToken->is(tok::at) || PrevToken->is(tok::l_brace))
617 return TT_UnaryOperator;
618
619 // There can't be to consecutive binary operators.
620 if (PrevToken->Type == TT_BinaryOperator)
621 return TT_UnaryOperator;
622
623 // Fall back to marking the token as binary operator.
624 return TT_BinaryOperator;
625 }
626
627 /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
628 TokenType determineIncrementUsage(const AnnotatedToken &Tok) {
629 const AnnotatedToken *PrevToken = getPreviousToken(Tok);
630 if (PrevToken == NULL)
631 return TT_UnaryOperator;
632 if (PrevToken->is(tok::r_paren) || PrevToken->is(tok::r_square) ||
633 PrevToken->is(tok::identifier))
634 return TT_TrailingUnaryOperator;
635
636 return TT_UnaryOperator;
637 }
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000638};
639
640void TokenAnnotator::annotate() {
Daniel Jasper01786732013-02-04 07:21:18 +0000641 AnnotatingParser Parser(SourceMgr, Lex, Line);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000642 Line.Type = Parser.parseLine();
643 if (Line.Type == LT_Invalid)
644 return;
645
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000646 if (Line.First.Type == TT_ObjCMethodSpecifier)
647 Line.Type = LT_ObjCMethodDecl;
648 else if (Line.First.Type == TT_ObjCDecl)
649 Line.Type = LT_ObjCDecl;
650 else if (Line.First.Type == TT_ObjCProperty)
651 Line.Type = LT_ObjCProperty;
652
653 Line.First.SpaceRequiredBefore = true;
654 Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore;
655 Line.First.CanBreakBefore = Line.First.MustBreakBefore;
656
657 Line.First.TotalLength = Line.First.FormatTok.TokenLength;
658 if (!Line.First.Children.empty())
Daniel Jasper01786732013-02-04 07:21:18 +0000659 calculateFormattingInformation(Line.First.Children[0]);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000660}
661
Daniel Jasper01786732013-02-04 07:21:18 +0000662void TokenAnnotator::calculateFormattingInformation(AnnotatedToken &Current) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000663 Current.SpaceRequiredBefore = spaceRequiredBefore(Current);
664
665 if (Current.FormatTok.MustBreakBefore) {
666 Current.MustBreakBefore = true;
Daniel Jasper2752ff32013-02-04 07:32:14 +0000667 } else if (Current.Type == TT_LineComment) {
668 Current.MustBreakBefore = Current.FormatTok.NewlinesBefore > 0;
669 } else if ((Current.Parent->is(tok::comment) &&
670 Current.FormatTok.NewlinesBefore > 0) ||
671 (Current.is(tok::string_literal) &&
672 Current.Parent->is(tok::string_literal))) {
673 Current.MustBreakBefore = true;
Daniel Jasperfa543ac2013-02-04 07:34:48 +0000674 } else if (Current.is(tok::lessless) && !Current.Children.empty() &&
675 Current.Parent->is(tok::string_literal) &&
676 Current.Children[0].is(tok::string_literal)) {
677 Current.MustBreakBefore = true;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000678 } else {
Daniel Jasper2752ff32013-02-04 07:32:14 +0000679 Current.MustBreakBefore = false;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000680 }
681 Current.CanBreakBefore = Current.MustBreakBefore || canBreakBefore(Current);
682 if (Current.MustBreakBefore)
683 Current.TotalLength = Current.Parent->TotalLength + Style.ColumnLimit;
684 else
685 Current.TotalLength =
686 Current.Parent->TotalLength + Current.FormatTok.TokenLength +
687 (Current.SpaceRequiredBefore ? 1 : 0);
688 // FIXME: Only calculate this if CanBreakBefore is true once static
689 // initializers etc. are sorted out.
Daniel Jasper01786732013-02-04 07:21:18 +0000690 // FIXME: Move magic numbers to a better place.
691 Current.SplitPenalty = 20 * Current.BindingStrength + splitPenalty(Current);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000692 if (!Current.Children.empty())
Daniel Jasper01786732013-02-04 07:21:18 +0000693 calculateFormattingInformation(Current.Children[0]);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000694}
695
696unsigned TokenAnnotator::splitPenalty(const AnnotatedToken &Tok) {
697 const AnnotatedToken &Left = *Tok.Parent;
698 const AnnotatedToken &Right = Tok;
699
700 if (Left.is(tok::l_brace) && Right.isNot(tok::l_brace))
701 return 50;
702 if (Left.is(tok::equal) && Right.is(tok::l_brace))
703 return 150;
704 if (Left.is(tok::coloncolon))
705 return 500;
706
707 if (Left.Type == TT_RangeBasedForLoopColon)
708 return 5;
709
710 if (Right.is(tok::arrow) || Right.is(tok::period)) {
711 if (Left.is(tok::r_paren) && Line.Type == LT_BuilderTypeCall)
712 return 5; // Should be smaller than breaking at a nested comma.
713 return 150;
714 }
715
716 // In for-loops, prefer breaking at ',' and ';'.
717 if (Line.First.is(tok::kw_for) &&
718 (Left.isNot(tok::comma) && Left.isNot(tok::semi)))
719 return 20;
720
Daniel Jasper8159d2f2013-02-04 07:30:30 +0000721 if (Left.is(tok::semi))
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000722 return 0;
Daniel Jasper8159d2f2013-02-04 07:30:30 +0000723 if (Left.is(tok::comma))
724 return 1;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000725
726 // In Objective-C method expressions, prefer breaking before "param:" over
727 // breaking after it.
728 if (isObjCSelectorName(Right))
729 return 0;
730 if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
731 return 20;
732
Daniel Jasper01786732013-02-04 07:21:18 +0000733 if (Left.is(tok::l_paren) || Left.is(tok::l_square) ||
734 Left.Type == TT_TemplateOpener)
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000735 return 20;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000736
Daniel Jasper01786732013-02-04 07:21:18 +0000737 if (Right.is(tok::lessless))
738 return prec::Shift;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000739 if (Left.Type == TT_ConditionalExpr)
740 return prec::Assignment;
741 prec::Level Level = getPrecedence(Left);
742
743 if (Level != prec::Unknown)
744 return Level;
745
746 return 3;
747}
748
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000749bool TokenAnnotator::spaceRequiredBetween(const AnnotatedToken &Left,
750 const AnnotatedToken &Right) {
751 if (Right.is(tok::hashhash))
752 return Left.is(tok::hash);
753 if (Left.is(tok::hashhash) || Left.is(tok::hash))
754 return Right.is(tok::hash);
755 if (Right.is(tok::r_paren) || Right.is(tok::semi) || Right.is(tok::comma))
756 return false;
757 if (Right.is(tok::less) &&
758 (Left.is(tok::kw_template) ||
759 (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
760 return true;
761 if (Left.is(tok::arrow) || Right.is(tok::arrow))
762 return false;
763 if (Left.is(tok::exclaim) || Left.is(tok::tilde))
764 return false;
765 if (Left.is(tok::at) &&
766 (Right.is(tok::identifier) || Right.is(tok::string_literal) ||
767 Right.is(tok::char_constant) || Right.is(tok::numeric_constant) ||
768 Right.is(tok::l_paren) || Right.is(tok::l_brace) ||
769 Right.is(tok::kw_true) || Right.is(tok::kw_false)))
770 return false;
771 if (Left.is(tok::coloncolon))
772 return false;
773 if (Right.is(tok::coloncolon))
774 return Left.isNot(tok::identifier) && Left.isNot(tok::greater);
775 if (Left.is(tok::less) || Right.is(tok::greater) || Right.is(tok::less))
776 return false;
777 if (Right.is(tok::amp) || Right.is(tok::star))
778 return Left.FormatTok.Tok.isLiteral() ||
779 (Left.isNot(tok::star) && Left.isNot(tok::amp) &&
780 !Style.PointerAndReferenceBindToType);
781 if (Left.is(tok::amp) || Left.is(tok::star))
782 return Right.FormatTok.Tok.isLiteral() ||
783 Style.PointerAndReferenceBindToType;
784 if (Right.is(tok::star) && Left.is(tok::l_paren))
785 return false;
786 if (Left.is(tok::l_square) || Right.is(tok::r_square))
787 return false;
788 if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr)
789 return false;
790 if (Left.is(tok::period) || Right.is(tok::period))
791 return false;
792 if (Left.is(tok::colon))
793 return Left.Type != TT_ObjCMethodExpr;
794 if (Right.is(tok::colon))
795 return Right.Type != TT_ObjCMethodExpr;
796 if (Left.is(tok::l_paren))
797 return false;
798 if (Right.is(tok::l_paren)) {
799 return Line.Type == LT_ObjCDecl || Left.is(tok::kw_if) ||
800 Left.is(tok::kw_for) || Left.is(tok::kw_while) ||
801 Left.is(tok::kw_switch) || Left.is(tok::kw_return) ||
802 Left.is(tok::kw_catch) || Left.is(tok::kw_new) ||
803 Left.is(tok::kw_delete);
804 }
805 if (Left.is(tok::at) &&
806 Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword)
807 return false;
808 if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
809 return false;
810 return true;
811}
812
813bool TokenAnnotator::spaceRequiredBefore(const AnnotatedToken &Tok) {
814 if (Line.Type == LT_ObjCMethodDecl) {
815 if (Tok.is(tok::identifier) && !Tok.Children.empty() &&
816 Tok.Children[0].is(tok::colon) && Tok.Parent->is(tok::identifier))
817 return true;
818 if (Tok.is(tok::colon))
819 return false;
820 if (Tok.Parent->Type == TT_ObjCMethodSpecifier)
821 return true;
822 if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier))
823 // Don't space between ')' and <id>
824 return false;
825 if (Tok.Parent->is(tok::colon) && Tok.is(tok::l_paren))
826 // Don't space between ':' and '('
827 return false;
828 }
829 if (Line.Type == LT_ObjCProperty &&
830 (Tok.is(tok::equal) || Tok.Parent->is(tok::equal)))
831 return false;
832
833 if (Tok.Parent->is(tok::comma))
834 return true;
835 if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen)
836 return true;
837 if (Tok.Type == TT_OverloadedOperator)
838 return Tok.is(tok::identifier) || Tok.is(tok::kw_new) ||
839 Tok.is(tok::kw_delete) || Tok.is(tok::kw_bool);
840 if (Tok.Parent->Type == TT_OverloadedOperator)
841 return false;
842 if (Tok.is(tok::colon))
843 return Line.First.isNot(tok::kw_case) && !Tok.Children.empty() &&
844 Tok.Type != TT_ObjCMethodExpr;
845 if (Tok.Parent->Type == TT_UnaryOperator || Tok.Parent->Type == TT_CastRParen)
846 return false;
847 if (Tok.Type == TT_UnaryOperator)
848 return Tok.Parent->isNot(tok::l_paren) &&
849 Tok.Parent->isNot(tok::l_square) && Tok.Parent->isNot(tok::at) &&
850 (Tok.Parent->isNot(tok::colon) ||
851 Tok.Parent->Type != TT_ObjCMethodExpr);
852 if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) {
853 return Tok.Type == TT_TemplateCloser && Tok.Parent->Type ==
854 TT_TemplateCloser && Style.SplitTemplateClosingGreater;
855 }
856 if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator)
857 return true;
858 if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
859 return false;
860 if (Tok.is(tok::less) && Line.First.is(tok::hash))
861 return true;
862 if (Tok.Type == TT_TrailingUnaryOperator)
863 return false;
864 return spaceRequiredBetween(*Tok.Parent, Tok);
865}
866
867bool TokenAnnotator::canBreakBefore(const AnnotatedToken &Right) {
868 const AnnotatedToken &Left = *Right.Parent;
869 if (Line.Type == LT_ObjCMethodDecl) {
870 if (Right.is(tok::identifier) && !Right.Children.empty() &&
871 Right.Children[0].is(tok::colon) && Left.is(tok::identifier))
872 return true;
873 if (Right.is(tok::identifier) && Left.is(tok::l_paren) &&
874 Left.Parent->is(tok::colon))
875 // Don't break this identifier as ':' or identifier
876 // before it will break.
877 return false;
878 if (Right.is(tok::colon) && Left.is(tok::identifier) && Left.CanBreakBefore)
879 // Don't break at ':' if identifier before it can beak.
880 return false;
881 }
882 if (Right.Type == TT_StartOfName && Style.AllowReturnTypeOnItsOwnLine)
883 return true;
884 if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
885 return false;
886 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
887 return true;
888 if (isObjCSelectorName(Right))
889 return true;
890 if (Left.ClosesTemplateDeclaration)
891 return true;
892 if (Right.Type == TT_ConditionalExpr || Right.is(tok::question))
893 return true;
894 if (Left.Type == TT_RangeBasedForLoopColon)
895 return true;
896 if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
897 Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr ||
898 Left.is(tok::question))
899 return false;
900 if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
901 return false;
902
903 if (Right.Type == TT_LineComment)
904 // We rely on MustBreakBefore being set correctly here as we should not
905 // change the "binding" behavior of a comment.
906 return false;
907
908 // Allow breaking after a trailing 'const', e.g. after a method declaration,
909 // unless it is follow by ';', '{' or '='.
910 if (Left.is(tok::kw_const) && Left.Parent != NULL &&
911 Left.Parent->is(tok::r_paren))
912 return Right.isNot(tok::l_brace) && Right.isNot(tok::semi) &&
913 Right.isNot(tok::equal);
914
915 // We only break before r_brace if there was a corresponding break before
916 // the l_brace, which is tracked by BreakBeforeClosingBrace.
917 if (Right.is(tok::r_brace))
918 return false;
919
920 if (Right.is(tok::r_paren) || Right.is(tok::greater))
921 return false;
922 return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) ||
923 Left.is(tok::comma) || Right.is(tok::lessless) ||
924 Right.is(tok::arrow) || Right.is(tok::period) ||
925 Right.is(tok::colon) || Left.is(tok::coloncolon) ||
926 Left.is(tok::semi) || Left.is(tok::l_brace) ||
927 (Left.is(tok::r_paren) && Left.Type != TT_CastRParen &&
928 Right.is(tok::identifier)) ||
929 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
930 (Left.is(tok::l_square) && !Right.is(tok::r_square));
931}
932
933} // namespace format
934} // namespace clang