blob: 0676df6c0e3922e900f22cf754a2fb2130617e92 [file] [log] [blame]
Daniel Jasper32d28ee2013-01-29 21:01:14 +00001//===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements a token annotator, i.e. creates
12/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13///
14//===----------------------------------------------------------------------===//
15
16#include "TokenAnnotator.h"
17#include "clang/Basic/SourceManager.h"
18#include "clang/Lex/Lexer.h"
19
20namespace clang {
21namespace format {
22
23/// \brief Returns if a token is an Objective-C selector name.
24///
25/// For example, "bar" is a selector name in [foo bar:(4 + 5)].
26static bool isObjCSelectorName(const AnnotatedToken &Tok) {
27 return Tok.is(tok::identifier) && !Tok.Children.empty() &&
28 Tok.Children[0].is(tok::colon) &&
29 Tok.Children[0].Type == TT_ObjCMethodExpr;
30}
31
32static bool isBinaryOperator(const AnnotatedToken &Tok) {
33 // Comma is a binary operator, but does not behave as such wrt. formatting.
34 return getPrecedence(Tok) > prec::Comma;
35}
36
Daniel Jasper01786732013-02-04 07:21:18 +000037// Returns the previous token ignoring comments.
38static const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) {
39 const AnnotatedToken *PrevToken = Tok.Parent;
40 while (PrevToken != NULL && PrevToken->is(tok::comment))
41 PrevToken = PrevToken->Parent;
42 return PrevToken;
43}
44
45// Returns the next token ignoring comments.
46static const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) {
47 if (Tok.Children.empty())
48 return NULL;
49 const AnnotatedToken *NextToken = &Tok.Children[0];
50 while (NextToken->is(tok::comment)) {
51 if (NextToken->Children.empty())
52 return NULL;
53 NextToken = &NextToken->Children[0];
54 }
55 return NextToken;
56}
57
Daniel Jasper32d28ee2013-01-29 21:01:14 +000058/// \brief A parser that gathers additional information about tokens.
59///
60/// The \c TokenAnnotator tries to matches parenthesis and square brakets and
61/// store a parenthesis levels. It also tries to resolve matching "<" and ">"
62/// into template parameter lists.
63class AnnotatingParser {
64public:
Daniel Jasper01786732013-02-04 07:21:18 +000065 AnnotatingParser(SourceManager &SourceMgr, Lexer &Lex, AnnotatedLine &Line)
66 : SourceMgr(SourceMgr), Lex(Lex), Line(Line), CurrentToken(&Line.First),
67 KeywordVirtualFound(false), ColonIsObjCMethodExpr(false),
68 ColonIsForRangeExpr(false), IsExpression(false),
69 LookForFunctionName(Line.MustBeDeclaration), BindingStrength(1) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +000070 }
71
72 /// \brief A helper class to manage AnnotatingParser::ColonIsObjCMethodExpr.
73 struct ObjCSelectorRAII {
74 AnnotatingParser &P;
75 bool ColonWasObjCMethodExpr;
76
77 ObjCSelectorRAII(AnnotatingParser &P)
78 : P(P), ColonWasObjCMethodExpr(P.ColonIsObjCMethodExpr) {
79 }
80
81 ~ObjCSelectorRAII() { P.ColonIsObjCMethodExpr = ColonWasObjCMethodExpr; }
82
83 void markStart(AnnotatedToken &Left) {
84 P.ColonIsObjCMethodExpr = true;
85 Left.Type = TT_ObjCMethodExpr;
86 }
87
88 void markEnd(AnnotatedToken &Right) { Right.Type = TT_ObjCMethodExpr; }
89 };
90
Daniel Jasper01786732013-02-04 07:21:18 +000091 struct ScopedBindingStrengthIncrease {
92 AnnotatingParser &P;
93 unsigned Increase;
94
95 ScopedBindingStrengthIncrease(AnnotatingParser &P, unsigned Increase)
96 : P(P), Increase(Increase) {
97 P.BindingStrength += Increase;
98 }
99
100 ~ScopedBindingStrengthIncrease() { P.BindingStrength -= Increase; }
101 };
102
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000103 bool parseAngle() {
104 if (CurrentToken == NULL)
105 return false;
Daniel Jasper01786732013-02-04 07:21:18 +0000106 ScopedBindingStrengthIncrease Increase(*this, 10);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000107 AnnotatedToken *Left = CurrentToken->Parent;
108 while (CurrentToken != NULL) {
109 if (CurrentToken->is(tok::greater)) {
110 Left->MatchingParen = CurrentToken;
111 CurrentToken->MatchingParen = Left;
112 CurrentToken->Type = TT_TemplateCloser;
113 next();
114 return true;
115 }
116 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square) ||
117 CurrentToken->is(tok::r_brace))
118 return false;
119 if (CurrentToken->is(tok::pipepipe) || CurrentToken->is(tok::ampamp) ||
120 CurrentToken->is(tok::question) || CurrentToken->is(tok::colon))
121 return false;
122 if (CurrentToken->is(tok::comma))
123 ++Left->ParameterCount;
124 if (!consumeToken())
125 return false;
126 }
127 return false;
128 }
129
130 bool parseParens(bool LookForDecls = false) {
131 if (CurrentToken == NULL)
132 return false;
Daniel Jasper01786732013-02-04 07:21:18 +0000133 ScopedBindingStrengthIncrease Increase(*this, 1);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000134 bool StartsObjCMethodExpr = false;
135 AnnotatedToken *Left = CurrentToken->Parent;
136 if (CurrentToken->is(tok::caret)) {
137 // ^( starts a block.
138 Left->Type = TT_ObjCBlockLParen;
139 } else if (AnnotatedToken *MaybeSel = Left->Parent) {
140 // @selector( starts a selector.
141 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent &&
142 MaybeSel->Parent->is(tok::at)) {
143 StartsObjCMethodExpr = true;
144 }
145 }
146
147 ObjCSelectorRAII objCSelector(*this);
148 if (StartsObjCMethodExpr)
149 objCSelector.markStart(*Left);
150
151 while (CurrentToken != NULL) {
152 // LookForDecls is set when "if (" has been seen. Check for
153 // 'identifier' '*' 'identifier' followed by not '=' -- this
154 // '*' has to be a binary operator but determineStarAmpUsage() will
155 // categorize it as an unary operator, so set the right type here.
156 if (LookForDecls && !CurrentToken->Children.empty()) {
157 AnnotatedToken &Prev = *CurrentToken->Parent;
158 AnnotatedToken &Next = CurrentToken->Children[0];
159 if (Prev.Parent->is(tok::identifier) &&
160 (Prev.is(tok::star) || Prev.is(tok::amp)) &&
161 CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) {
162 Prev.Type = TT_BinaryOperator;
163 LookForDecls = false;
164 }
165 }
166
167 if (CurrentToken->is(tok::r_paren)) {
168 Left->MatchingParen = CurrentToken;
169 CurrentToken->MatchingParen = Left;
170
171 if (StartsObjCMethodExpr)
172 objCSelector.markEnd(*CurrentToken);
173
174 next();
175 return true;
176 }
177 if (CurrentToken->is(tok::r_square) || CurrentToken->is(tok::r_brace))
178 return false;
179 if (CurrentToken->is(tok::comma))
180 ++Left->ParameterCount;
181 if (!consumeToken())
182 return false;
183 }
184 return false;
185 }
186
187 bool parseSquare() {
188 if (!CurrentToken)
189 return false;
Daniel Jasper01786732013-02-04 07:21:18 +0000190 ScopedBindingStrengthIncrease Increase(*this, 10);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000191
192 // A '[' could be an index subscript (after an indentifier or after
193 // ')' or ']'), or it could be the start of an Objective-C method
194 // expression.
195 AnnotatedToken *Left = CurrentToken->Parent;
196 bool StartsObjCMethodExpr =
197 !Left->Parent || Left->Parent->is(tok::colon) ||
198 Left->Parent->is(tok::l_square) || Left->Parent->is(tok::l_paren) ||
199 Left->Parent->is(tok::kw_return) || Left->Parent->is(tok::kw_throw) ||
200 getBinOpPrecedence(Left->Parent->FormatTok.Tok.getKind(), true, true) >
201 prec::Unknown;
202
203 ObjCSelectorRAII objCSelector(*this);
204 if (StartsObjCMethodExpr)
205 objCSelector.markStart(*Left);
206
207 while (CurrentToken != NULL) {
208 if (CurrentToken->is(tok::r_square)) {
209 if (!CurrentToken->Children.empty() &&
210 CurrentToken->Children[0].is(tok::l_paren)) {
211 // An ObjC method call can't be followed by an open parenthesis.
212 // FIXME: Do we incorrectly label ":" with this?
213 StartsObjCMethodExpr = false;
214 Left->Type = TT_Unknown;
215 }
Daniel Jasper01786732013-02-04 07:21:18 +0000216 if (StartsObjCMethodExpr) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000217 objCSelector.markEnd(*CurrentToken);
Daniel Jasper01786732013-02-04 07:21:18 +0000218 if (Left->Parent != NULL &&
219 (Left->Parent->is(tok::star) || Left->Parent->is(tok::amp)))
220 Left->Parent->Type = TT_BinaryOperator;
221 }
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000222 Left->MatchingParen = CurrentToken;
223 CurrentToken->MatchingParen = Left;
224 next();
225 return true;
226 }
227 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_brace))
228 return false;
229 if (CurrentToken->is(tok::comma))
230 ++Left->ParameterCount;
231 if (!consumeToken())
232 return false;
233 }
234 return false;
235 }
236
237 bool parseBrace() {
238 // Lines are fine to end with '{'.
239 if (CurrentToken == NULL)
240 return true;
Daniel Jasper01786732013-02-04 07:21:18 +0000241 ScopedBindingStrengthIncrease Increase(*this, 1);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000242 AnnotatedToken *Left = CurrentToken->Parent;
243 while (CurrentToken != NULL) {
244 if (CurrentToken->is(tok::r_brace)) {
245 Left->MatchingParen = CurrentToken;
246 CurrentToken->MatchingParen = Left;
247 next();
248 return true;
249 }
250 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square))
251 return false;
Daniel Jasperf343cab2013-01-31 14:59:26 +0000252 if (CurrentToken->is(tok::comma))
253 ++Left->ParameterCount;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000254 if (!consumeToken())
255 return false;
256 }
257 return true;
258 }
259
260 bool parseConditional() {
261 while (CurrentToken != NULL) {
262 if (CurrentToken->is(tok::colon)) {
263 CurrentToken->Type = TT_ConditionalExpr;
264 next();
265 return true;
266 }
267 if (!consumeToken())
268 return false;
269 }
270 return false;
271 }
272
273 bool parseTemplateDeclaration() {
274 if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
275 CurrentToken->Type = TT_TemplateOpener;
276 next();
277 if (!parseAngle())
278 return false;
279 CurrentToken->Parent->ClosesTemplateDeclaration = true;
280 return true;
281 }
282 return false;
283 }
284
285 bool consumeToken() {
286 AnnotatedToken *Tok = CurrentToken;
287 next();
288 switch (Tok->FormatTok.Tok.getKind()) {
289 case tok::plus:
290 case tok::minus:
291 // At the start of the line, +/- specific ObjectiveC method
292 // declarations.
293 if (Tok->Parent == NULL)
294 Tok->Type = TT_ObjCMethodSpecifier;
295 break;
296 case tok::colon:
297 // Colons from ?: are handled in parseConditional().
298 if (Tok->Parent->is(tok::r_paren))
299 Tok->Type = TT_CtorInitializerColon;
300 else if (ColonIsObjCMethodExpr)
301 Tok->Type = TT_ObjCMethodExpr;
302 else if (ColonIsForRangeExpr)
303 Tok->Type = TT_RangeBasedForLoopColon;
304 break;
305 case tok::kw_if:
306 case tok::kw_while:
307 if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
308 next();
309 if (!parseParens(/*LookForDecls=*/ true))
310 return false;
311 }
312 break;
313 case tok::kw_for:
314 ColonIsForRangeExpr = true;
315 next();
316 if (!parseParens())
317 return false;
318 break;
319 case tok::l_paren:
320 if (!parseParens())
321 return false;
322 break;
323 case tok::l_square:
324 if (!parseSquare())
325 return false;
326 break;
327 case tok::l_brace:
328 if (!parseBrace())
329 return false;
330 break;
331 case tok::less:
332 if (parseAngle())
333 Tok->Type = TT_TemplateOpener;
334 else {
335 Tok->Type = TT_BinaryOperator;
336 CurrentToken = Tok;
337 next();
338 }
339 break;
340 case tok::r_paren:
341 case tok::r_square:
342 return false;
343 case tok::r_brace:
344 // Lines can start with '}'.
345 if (Tok->Parent != NULL)
346 return false;
347 break;
348 case tok::greater:
349 Tok->Type = TT_BinaryOperator;
350 break;
351 case tok::kw_operator:
352 if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
353 CurrentToken->Type = TT_OverloadedOperator;
354 next();
355 if (CurrentToken != NULL && CurrentToken->is(tok::r_paren)) {
356 CurrentToken->Type = TT_OverloadedOperator;
357 next();
358 }
359 } else {
360 while (CurrentToken != NULL && CurrentToken->isNot(tok::l_paren)) {
361 CurrentToken->Type = TT_OverloadedOperator;
362 next();
363 }
364 }
365 break;
366 case tok::question:
367 parseConditional();
368 break;
369 case tok::kw_template:
370 parseTemplateDeclaration();
371 break;
372 default:
373 break;
374 }
375 return true;
376 }
377
378 void parseIncludeDirective() {
379 next();
380 if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
381 next();
382 while (CurrentToken != NULL) {
383 if (CurrentToken->isNot(tok::comment) ||
384 !CurrentToken->Children.empty())
385 CurrentToken->Type = TT_ImplicitStringLiteral;
386 next();
387 }
388 } else {
389 while (CurrentToken != NULL) {
390 next();
391 }
392 }
393 }
394
395 void parseWarningOrError() {
396 next();
397 // We still want to format the whitespace left of the first token of the
398 // warning or error.
399 next();
400 while (CurrentToken != NULL) {
401 CurrentToken->Type = TT_ImplicitStringLiteral;
402 next();
403 }
404 }
405
406 void parsePreprocessorDirective() {
407 next();
408 if (CurrentToken == NULL)
409 return;
410 // Hashes in the middle of a line can lead to any strange token
411 // sequence.
412 if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL)
413 return;
414 switch (CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
415 case tok::pp_include:
416 case tok::pp_import:
417 parseIncludeDirective();
418 break;
419 case tok::pp_error:
420 case tok::pp_warning:
421 parseWarningOrError();
422 break;
423 default:
424 break;
425 }
426 }
427
428 LineType parseLine() {
429 int PeriodsAndArrows = 0;
430 bool CanBeBuilderTypeStmt = true;
431 if (CurrentToken->is(tok::hash)) {
432 parsePreprocessorDirective();
433 return LT_PreprocessorDirective;
434 }
435 while (CurrentToken != NULL) {
436 if (CurrentToken->is(tok::kw_virtual))
437 KeywordVirtualFound = true;
438 if (CurrentToken->is(tok::period) || CurrentToken->is(tok::arrow))
439 ++PeriodsAndArrows;
440 if (getPrecedence(*CurrentToken) > prec::Assignment &&
441 CurrentToken->isNot(tok::less) && CurrentToken->isNot(tok::greater))
442 CanBeBuilderTypeStmt = false;
443 if (!consumeToken())
444 return LT_Invalid;
445 }
446 if (KeywordVirtualFound)
447 return LT_VirtualFunctionDecl;
448
449 // Assume a builder-type call if there are 2 or more "." and "->".
450 if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt)
451 return LT_BuilderTypeCall;
452
453 return LT_Other;
454 }
455
456 void next() {
Daniel Jasper01786732013-02-04 07:21:18 +0000457 if (CurrentToken != NULL) {
458 determineTokenType(*CurrentToken);
459 CurrentToken->BindingStrength = BindingStrength;
460 }
461
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000462 if (CurrentToken != NULL && !CurrentToken->Children.empty())
463 CurrentToken = &CurrentToken->Children[0];
464 else
465 CurrentToken = NULL;
466 }
467
468private:
Daniel Jasper01786732013-02-04 07:21:18 +0000469 SourceManager &SourceMgr;
470 Lexer &Lex;
471 AnnotatedLine &Line;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000472 AnnotatedToken *CurrentToken;
473 bool KeywordVirtualFound;
474 bool ColonIsObjCMethodExpr;
475 bool ColonIsForRangeExpr;
Daniel Jasper01786732013-02-04 07:21:18 +0000476 bool IsExpression;
477 bool LookForFunctionName;
478
479 unsigned BindingStrength;
480
481 void determineTokenType(AnnotatedToken &Current) {
482 if (getPrecedence(Current) == prec::Assignment) {
483 IsExpression = true;
484 AnnotatedToken *Previous = Current.Parent;
485 while (Previous != NULL) {
486 if (Previous->Type == TT_BinaryOperator &&
487 (Previous->is(tok::star) || Previous->is(tok::amp))) {
488 Previous->Type = TT_PointerOrReference;
489 }
490 Previous = Previous->Parent;
491 }
492 }
493 if (Current.is(tok::kw_return) || Current.is(tok::kw_throw) ||
494 (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
495 (Current.Parent == NULL || Current.Parent->isNot(tok::kw_for))))
496 IsExpression = true;
497
498 if (Current.Type == TT_Unknown) {
499 if (LookForFunctionName && Current.is(tok::l_paren)) {
500 findFunctionName(&Current);
501 LookForFunctionName = false;
502 } else if (Current.is(tok::star) || Current.is(tok::amp)) {
503 Current.Type = determineStarAmpUsage(Current, IsExpression);
504 } else if (Current.is(tok::minus) || Current.is(tok::plus) ||
505 Current.is(tok::caret)) {
506 Current.Type = determinePlusMinusCaretUsage(Current);
507 } else if (Current.is(tok::minusminus) || Current.is(tok::plusplus)) {
508 Current.Type = determineIncrementUsage(Current);
509 } else if (Current.is(tok::exclaim)) {
510 Current.Type = TT_UnaryOperator;
511 } else if (isBinaryOperator(Current)) {
512 Current.Type = TT_BinaryOperator;
513 } else if (Current.is(tok::comment)) {
514 std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr,
515 Lex.getLangOpts()));
516 if (StringRef(Data).startswith("//"))
517 Current.Type = TT_LineComment;
518 else
519 Current.Type = TT_BlockComment;
520 } else if (Current.is(tok::r_paren) &&
521 (Current.Parent->Type == TT_PointerOrReference ||
522 Current.Parent->Type == TT_TemplateCloser) &&
523 (Current.Children.empty() ||
524 (Current.Children[0].isNot(tok::equal) &&
525 Current.Children[0].isNot(tok::semi) &&
526 Current.Children[0].isNot(tok::l_brace)))) {
527 // FIXME: We need to get smarter and understand more cases of casts.
528 Current.Type = TT_CastRParen;
529 } else if (Current.is(tok::at) && Current.Children.size()) {
530 switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) {
531 case tok::objc_interface:
532 case tok::objc_implementation:
533 case tok::objc_protocol:
534 Current.Type = TT_ObjCDecl;
535 break;
536 case tok::objc_property:
537 Current.Type = TT_ObjCProperty;
538 break;
539 default:
540 break;
541 }
542 }
543 }
544 }
545
546 /// \brief Starting from \p Current, this searches backwards for an
547 /// identifier which could be the start of a function name and marks it.
548 void findFunctionName(AnnotatedToken *Current) {
549 AnnotatedToken *Parent = Current->Parent;
550 while (Parent != NULL && Parent->Parent != NULL) {
551 if (Parent->is(tok::identifier) &&
552 (Parent->Parent->is(tok::identifier) ||
553 Parent->Parent->Type == TT_PointerOrReference ||
554 Parent->Parent->Type == TT_TemplateCloser)) {
555 Parent->Type = TT_StartOfName;
556 break;
557 }
558 Parent = Parent->Parent;
559 }
560 }
561
562 /// \brief Return the type of the given token assuming it is * or &.
563 TokenType
564 determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression) {
565 const AnnotatedToken *PrevToken = getPreviousToken(Tok);
566 if (PrevToken == NULL)
567 return TT_UnaryOperator;
568
569 const AnnotatedToken *NextToken = getNextToken(Tok);
570 if (NextToken == NULL)
571 return TT_Unknown;
572
573 if (NextToken->is(tok::l_square))
574 return TT_PointerOrReference;
575
576 if (PrevToken->is(tok::l_paren) || PrevToken->is(tok::l_square) ||
577 PrevToken->is(tok::l_brace) || PrevToken->is(tok::comma) ||
578 PrevToken->is(tok::kw_return) || PrevToken->is(tok::colon) ||
579 PrevToken->Type == TT_BinaryOperator ||
580 PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
581 return TT_UnaryOperator;
582
583 if (PrevToken->FormatTok.Tok.isLiteral() || PrevToken->is(tok::r_paren) ||
584 PrevToken->is(tok::r_square) || NextToken->FormatTok.Tok.isLiteral() ||
585 NextToken->is(tok::plus) || NextToken->is(tok::minus) ||
586 NextToken->is(tok::plusplus) || NextToken->is(tok::minusminus) ||
587 NextToken->is(tok::tilde) || NextToken->is(tok::exclaim) ||
588 NextToken->is(tok::l_paren) || NextToken->is(tok::l_square) ||
589 NextToken->is(tok::kw_alignof) || NextToken->is(tok::kw_sizeof))
590 return TT_BinaryOperator;
591
592 if (NextToken->is(tok::comma) || NextToken->is(tok::r_paren) ||
593 NextToken->is(tok::greater))
594 return TT_PointerOrReference;
595
596 // It is very unlikely that we are going to find a pointer or reference type
597 // definition on the RHS of an assignment.
598 if (IsExpression)
599 return TT_BinaryOperator;
600
601 return TT_PointerOrReference;
602 }
603
604 TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) {
605 const AnnotatedToken *PrevToken = getPreviousToken(Tok);
606 if (PrevToken == NULL)
607 return TT_UnaryOperator;
608
609 // Use heuristics to recognize unary operators.
610 if (PrevToken->is(tok::equal) || PrevToken->is(tok::l_paren) ||
611 PrevToken->is(tok::comma) || PrevToken->is(tok::l_square) ||
612 PrevToken->is(tok::question) || PrevToken->is(tok::colon) ||
613 PrevToken->is(tok::kw_return) || PrevToken->is(tok::kw_case) ||
614 PrevToken->is(tok::at) || PrevToken->is(tok::l_brace))
615 return TT_UnaryOperator;
616
617 // There can't be to consecutive binary operators.
618 if (PrevToken->Type == TT_BinaryOperator)
619 return TT_UnaryOperator;
620
621 // Fall back to marking the token as binary operator.
622 return TT_BinaryOperator;
623 }
624
625 /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
626 TokenType determineIncrementUsage(const AnnotatedToken &Tok) {
627 const AnnotatedToken *PrevToken = getPreviousToken(Tok);
628 if (PrevToken == NULL)
629 return TT_UnaryOperator;
630 if (PrevToken->is(tok::r_paren) || PrevToken->is(tok::r_square) ||
631 PrevToken->is(tok::identifier))
632 return TT_TrailingUnaryOperator;
633
634 return TT_UnaryOperator;
635 }
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000636};
637
638void TokenAnnotator::annotate() {
Daniel Jasper01786732013-02-04 07:21:18 +0000639 AnnotatingParser Parser(SourceMgr, Lex, Line);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000640 Line.Type = Parser.parseLine();
641 if (Line.Type == LT_Invalid)
642 return;
643
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000644 if (Line.First.Type == TT_ObjCMethodSpecifier)
645 Line.Type = LT_ObjCMethodDecl;
646 else if (Line.First.Type == TT_ObjCDecl)
647 Line.Type = LT_ObjCDecl;
648 else if (Line.First.Type == TT_ObjCProperty)
649 Line.Type = LT_ObjCProperty;
650
651 Line.First.SpaceRequiredBefore = true;
652 Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore;
653 Line.First.CanBreakBefore = Line.First.MustBreakBefore;
654
655 Line.First.TotalLength = Line.First.FormatTok.TokenLength;
656 if (!Line.First.Children.empty())
Daniel Jasper01786732013-02-04 07:21:18 +0000657 calculateFormattingInformation(Line.First.Children[0]);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000658}
659
Daniel Jasper01786732013-02-04 07:21:18 +0000660void TokenAnnotator::calculateFormattingInformation(AnnotatedToken &Current) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000661 Current.SpaceRequiredBefore = spaceRequiredBefore(Current);
662
663 if (Current.FormatTok.MustBreakBefore) {
664 Current.MustBreakBefore = true;
665 } else {
666 if (Current.Type == TT_LineComment) {
667 Current.MustBreakBefore = Current.FormatTok.NewlinesBefore > 0;
668 } else if ((Current.Parent->is(tok::comment) &&
669 Current.FormatTok.NewlinesBefore > 0) ||
670 (Current.is(tok::string_literal) &&
671 Current.Parent->is(tok::string_literal))) {
672 Current.MustBreakBefore = true;
673 } else {
674 Current.MustBreakBefore = false;
675 }
676 }
677 Current.CanBreakBefore = Current.MustBreakBefore || canBreakBefore(Current);
678 if (Current.MustBreakBefore)
679 Current.TotalLength = Current.Parent->TotalLength + Style.ColumnLimit;
680 else
681 Current.TotalLength =
682 Current.Parent->TotalLength + Current.FormatTok.TokenLength +
683 (Current.SpaceRequiredBefore ? 1 : 0);
684 // FIXME: Only calculate this if CanBreakBefore is true once static
685 // initializers etc. are sorted out.
Daniel Jasper01786732013-02-04 07:21:18 +0000686 // FIXME: Move magic numbers to a better place.
687 Current.SplitPenalty = 20 * Current.BindingStrength + splitPenalty(Current);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000688 if (!Current.Children.empty())
Daniel Jasper01786732013-02-04 07:21:18 +0000689 calculateFormattingInformation(Current.Children[0]);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000690}
691
692unsigned TokenAnnotator::splitPenalty(const AnnotatedToken &Tok) {
693 const AnnotatedToken &Left = *Tok.Parent;
694 const AnnotatedToken &Right = Tok;
695
696 if (Left.is(tok::l_brace) && Right.isNot(tok::l_brace))
697 return 50;
698 if (Left.is(tok::equal) && Right.is(tok::l_brace))
699 return 150;
700 if (Left.is(tok::coloncolon))
701 return 500;
702
703 if (Left.Type == TT_RangeBasedForLoopColon)
704 return 5;
705
706 if (Right.is(tok::arrow) || Right.is(tok::period)) {
707 if (Left.is(tok::r_paren) && Line.Type == LT_BuilderTypeCall)
708 return 5; // Should be smaller than breaking at a nested comma.
709 return 150;
710 }
711
712 // In for-loops, prefer breaking at ',' and ';'.
713 if (Line.First.is(tok::kw_for) &&
714 (Left.isNot(tok::comma) && Left.isNot(tok::semi)))
715 return 20;
716
Daniel Jasper8159d2f2013-02-04 07:30:30 +0000717 if (Left.is(tok::semi))
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000718 return 0;
Daniel Jasper8159d2f2013-02-04 07:30:30 +0000719 if (Left.is(tok::comma))
720 return 1;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000721
722 // In Objective-C method expressions, prefer breaking before "param:" over
723 // breaking after it.
724 if (isObjCSelectorName(Right))
725 return 0;
726 if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
727 return 20;
728
Daniel Jasper01786732013-02-04 07:21:18 +0000729 if (Left.is(tok::l_paren) || Left.is(tok::l_square) ||
730 Left.Type == TT_TemplateOpener)
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000731 return 20;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000732
Daniel Jasper01786732013-02-04 07:21:18 +0000733 if (Right.is(tok::lessless))
734 return prec::Shift;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000735 if (Left.Type == TT_ConditionalExpr)
736 return prec::Assignment;
737 prec::Level Level = getPrecedence(Left);
738
739 if (Level != prec::Unknown)
740 return Level;
741
742 return 3;
743}
744
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000745bool TokenAnnotator::spaceRequiredBetween(const AnnotatedToken &Left,
746 const AnnotatedToken &Right) {
747 if (Right.is(tok::hashhash))
748 return Left.is(tok::hash);
749 if (Left.is(tok::hashhash) || Left.is(tok::hash))
750 return Right.is(tok::hash);
751 if (Right.is(tok::r_paren) || Right.is(tok::semi) || Right.is(tok::comma))
752 return false;
753 if (Right.is(tok::less) &&
754 (Left.is(tok::kw_template) ||
755 (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
756 return true;
757 if (Left.is(tok::arrow) || Right.is(tok::arrow))
758 return false;
759 if (Left.is(tok::exclaim) || Left.is(tok::tilde))
760 return false;
761 if (Left.is(tok::at) &&
762 (Right.is(tok::identifier) || Right.is(tok::string_literal) ||
763 Right.is(tok::char_constant) || Right.is(tok::numeric_constant) ||
764 Right.is(tok::l_paren) || Right.is(tok::l_brace) ||
765 Right.is(tok::kw_true) || Right.is(tok::kw_false)))
766 return false;
767 if (Left.is(tok::coloncolon))
768 return false;
769 if (Right.is(tok::coloncolon))
770 return Left.isNot(tok::identifier) && Left.isNot(tok::greater);
771 if (Left.is(tok::less) || Right.is(tok::greater) || Right.is(tok::less))
772 return false;
773 if (Right.is(tok::amp) || Right.is(tok::star))
774 return Left.FormatTok.Tok.isLiteral() ||
775 (Left.isNot(tok::star) && Left.isNot(tok::amp) &&
776 !Style.PointerAndReferenceBindToType);
777 if (Left.is(tok::amp) || Left.is(tok::star))
778 return Right.FormatTok.Tok.isLiteral() ||
779 Style.PointerAndReferenceBindToType;
780 if (Right.is(tok::star) && Left.is(tok::l_paren))
781 return false;
782 if (Left.is(tok::l_square) || Right.is(tok::r_square))
783 return false;
784 if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr)
785 return false;
786 if (Left.is(tok::period) || Right.is(tok::period))
787 return false;
788 if (Left.is(tok::colon))
789 return Left.Type != TT_ObjCMethodExpr;
790 if (Right.is(tok::colon))
791 return Right.Type != TT_ObjCMethodExpr;
792 if (Left.is(tok::l_paren))
793 return false;
794 if (Right.is(tok::l_paren)) {
795 return Line.Type == LT_ObjCDecl || Left.is(tok::kw_if) ||
796 Left.is(tok::kw_for) || Left.is(tok::kw_while) ||
797 Left.is(tok::kw_switch) || Left.is(tok::kw_return) ||
798 Left.is(tok::kw_catch) || Left.is(tok::kw_new) ||
799 Left.is(tok::kw_delete);
800 }
801 if (Left.is(tok::at) &&
802 Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword)
803 return false;
804 if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
805 return false;
806 return true;
807}
808
809bool TokenAnnotator::spaceRequiredBefore(const AnnotatedToken &Tok) {
810 if (Line.Type == LT_ObjCMethodDecl) {
811 if (Tok.is(tok::identifier) && !Tok.Children.empty() &&
812 Tok.Children[0].is(tok::colon) && Tok.Parent->is(tok::identifier))
813 return true;
814 if (Tok.is(tok::colon))
815 return false;
816 if (Tok.Parent->Type == TT_ObjCMethodSpecifier)
817 return true;
818 if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier))
819 // Don't space between ')' and <id>
820 return false;
821 if (Tok.Parent->is(tok::colon) && Tok.is(tok::l_paren))
822 // Don't space between ':' and '('
823 return false;
824 }
825 if (Line.Type == LT_ObjCProperty &&
826 (Tok.is(tok::equal) || Tok.Parent->is(tok::equal)))
827 return false;
828
829 if (Tok.Parent->is(tok::comma))
830 return true;
831 if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen)
832 return true;
833 if (Tok.Type == TT_OverloadedOperator)
834 return Tok.is(tok::identifier) || Tok.is(tok::kw_new) ||
835 Tok.is(tok::kw_delete) || Tok.is(tok::kw_bool);
836 if (Tok.Parent->Type == TT_OverloadedOperator)
837 return false;
838 if (Tok.is(tok::colon))
839 return Line.First.isNot(tok::kw_case) && !Tok.Children.empty() &&
840 Tok.Type != TT_ObjCMethodExpr;
841 if (Tok.Parent->Type == TT_UnaryOperator || Tok.Parent->Type == TT_CastRParen)
842 return false;
843 if (Tok.Type == TT_UnaryOperator)
844 return Tok.Parent->isNot(tok::l_paren) &&
845 Tok.Parent->isNot(tok::l_square) && Tok.Parent->isNot(tok::at) &&
846 (Tok.Parent->isNot(tok::colon) ||
847 Tok.Parent->Type != TT_ObjCMethodExpr);
848 if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) {
849 return Tok.Type == TT_TemplateCloser && Tok.Parent->Type ==
850 TT_TemplateCloser && Style.SplitTemplateClosingGreater;
851 }
852 if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator)
853 return true;
854 if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
855 return false;
856 if (Tok.is(tok::less) && Line.First.is(tok::hash))
857 return true;
858 if (Tok.Type == TT_TrailingUnaryOperator)
859 return false;
860 return spaceRequiredBetween(*Tok.Parent, Tok);
861}
862
863bool TokenAnnotator::canBreakBefore(const AnnotatedToken &Right) {
864 const AnnotatedToken &Left = *Right.Parent;
865 if (Line.Type == LT_ObjCMethodDecl) {
866 if (Right.is(tok::identifier) && !Right.Children.empty() &&
867 Right.Children[0].is(tok::colon) && Left.is(tok::identifier))
868 return true;
869 if (Right.is(tok::identifier) && Left.is(tok::l_paren) &&
870 Left.Parent->is(tok::colon))
871 // Don't break this identifier as ':' or identifier
872 // before it will break.
873 return false;
874 if (Right.is(tok::colon) && Left.is(tok::identifier) && Left.CanBreakBefore)
875 // Don't break at ':' if identifier before it can beak.
876 return false;
877 }
878 if (Right.Type == TT_StartOfName && Style.AllowReturnTypeOnItsOwnLine)
879 return true;
880 if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
881 return false;
882 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
883 return true;
884 if (isObjCSelectorName(Right))
885 return true;
886 if (Left.ClosesTemplateDeclaration)
887 return true;
888 if (Right.Type == TT_ConditionalExpr || Right.is(tok::question))
889 return true;
890 if (Left.Type == TT_RangeBasedForLoopColon)
891 return true;
892 if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
893 Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr ||
894 Left.is(tok::question))
895 return false;
896 if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
897 return false;
898
899 if (Right.Type == TT_LineComment)
900 // We rely on MustBreakBefore being set correctly here as we should not
901 // change the "binding" behavior of a comment.
902 return false;
903
904 // Allow breaking after a trailing 'const', e.g. after a method declaration,
905 // unless it is follow by ';', '{' or '='.
906 if (Left.is(tok::kw_const) && Left.Parent != NULL &&
907 Left.Parent->is(tok::r_paren))
908 return Right.isNot(tok::l_brace) && Right.isNot(tok::semi) &&
909 Right.isNot(tok::equal);
910
911 // We only break before r_brace if there was a corresponding break before
912 // the l_brace, which is tracked by BreakBeforeClosingBrace.
913 if (Right.is(tok::r_brace))
914 return false;
915
916 if (Right.is(tok::r_paren) || Right.is(tok::greater))
917 return false;
918 return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) ||
919 Left.is(tok::comma) || Right.is(tok::lessless) ||
920 Right.is(tok::arrow) || Right.is(tok::period) ||
921 Right.is(tok::colon) || Left.is(tok::coloncolon) ||
922 Left.is(tok::semi) || Left.is(tok::l_brace) ||
923 (Left.is(tok::r_paren) && Left.Type != TT_CastRParen &&
924 Right.is(tok::identifier)) ||
925 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
926 (Left.is(tok::l_square) && !Right.is(tok::r_square));
927}
928
929} // namespace format
930} // namespace clang