blob: a90134115f7b4c8c85fcaac7600c9a67d32c0343 [file] [log] [blame]
Daniel Jasper32d28ee2013-01-29 21:01:14 +00001//===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements a token annotator, i.e. creates
12/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13///
14//===----------------------------------------------------------------------===//
15
16#include "TokenAnnotator.h"
17#include "clang/Basic/SourceManager.h"
18#include "clang/Lex/Lexer.h"
19
20namespace clang {
21namespace format {
22
Nico Weberee0feec2013-02-05 16:21:00 +000023static bool isUnaryOperator(const AnnotatedToken &Tok) {
24 switch (Tok.FormatTok.Tok.getKind()) {
25 case tok::plus:
26 case tok::plusplus:
27 case tok::minus:
28 case tok::minusminus:
29 case tok::exclaim:
30 case tok::tilde:
31 case tok::kw_sizeof:
32 case tok::kw_alignof:
33 return true;
34 default:
35 return false;
36 }
37}
38
Daniel Jasper32d28ee2013-01-29 21:01:14 +000039static bool isBinaryOperator(const AnnotatedToken &Tok) {
40 // Comma is a binary operator, but does not behave as such wrt. formatting.
41 return getPrecedence(Tok) > prec::Comma;
42}
43
Daniel Jasper01786732013-02-04 07:21:18 +000044// Returns the previous token ignoring comments.
45static const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) {
46 const AnnotatedToken *PrevToken = Tok.Parent;
47 while (PrevToken != NULL && PrevToken->is(tok::comment))
48 PrevToken = PrevToken->Parent;
49 return PrevToken;
50}
51
52// Returns the next token ignoring comments.
53static const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) {
54 if (Tok.Children.empty())
55 return NULL;
56 const AnnotatedToken *NextToken = &Tok.Children[0];
57 while (NextToken->is(tok::comment)) {
58 if (NextToken->Children.empty())
59 return NULL;
60 NextToken = &NextToken->Children[0];
61 }
62 return NextToken;
63}
64
Daniel Jasper32d28ee2013-01-29 21:01:14 +000065/// \brief A parser that gathers additional information about tokens.
66///
67/// The \c TokenAnnotator tries to matches parenthesis and square brakets and
68/// store a parenthesis levels. It also tries to resolve matching "<" and ">"
69/// into template parameter lists.
70class AnnotatingParser {
71public:
Daniel Jasper01786732013-02-04 07:21:18 +000072 AnnotatingParser(SourceManager &SourceMgr, Lexer &Lex, AnnotatedLine &Line)
73 : SourceMgr(SourceMgr), Lex(Lex), Line(Line), CurrentToken(&Line.First),
74 KeywordVirtualFound(false), ColonIsObjCMethodExpr(false),
Daniel Jasper63d7ced2013-02-05 10:07:47 +000075 LongestObjCSelectorName(0), FirstObjCSelectorName(NULL),
Daniel Jasper01786732013-02-04 07:21:18 +000076 ColonIsForRangeExpr(false), IsExpression(false),
77 LookForFunctionName(Line.MustBeDeclaration), BindingStrength(1) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +000078 }
79
80 /// \brief A helper class to manage AnnotatingParser::ColonIsObjCMethodExpr.
81 struct ObjCSelectorRAII {
82 AnnotatingParser &P;
83 bool ColonWasObjCMethodExpr;
84
85 ObjCSelectorRAII(AnnotatingParser &P)
86 : P(P), ColonWasObjCMethodExpr(P.ColonIsObjCMethodExpr) {
87 }
88
89 ~ObjCSelectorRAII() { P.ColonIsObjCMethodExpr = ColonWasObjCMethodExpr; }
90
91 void markStart(AnnotatedToken &Left) {
92 P.ColonIsObjCMethodExpr = true;
Daniel Jasper63d7ced2013-02-05 10:07:47 +000093 P.LongestObjCSelectorName = 0;
94 P.FirstObjCSelectorName = NULL;
Daniel Jasper32d28ee2013-01-29 21:01:14 +000095 Left.Type = TT_ObjCMethodExpr;
96 }
97
98 void markEnd(AnnotatedToken &Right) { Right.Type = TT_ObjCMethodExpr; }
99 };
100
Daniel Jasper01786732013-02-04 07:21:18 +0000101 struct ScopedBindingStrengthIncrease {
102 AnnotatingParser &P;
103 unsigned Increase;
104
105 ScopedBindingStrengthIncrease(AnnotatingParser &P, unsigned Increase)
106 : P(P), Increase(Increase) {
107 P.BindingStrength += Increase;
108 }
109
110 ~ScopedBindingStrengthIncrease() { P.BindingStrength -= Increase; }
111 };
112
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000113 bool parseAngle() {
114 if (CurrentToken == NULL)
115 return false;
Daniel Jasper01786732013-02-04 07:21:18 +0000116 ScopedBindingStrengthIncrease Increase(*this, 10);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000117 AnnotatedToken *Left = CurrentToken->Parent;
118 while (CurrentToken != NULL) {
119 if (CurrentToken->is(tok::greater)) {
120 Left->MatchingParen = CurrentToken;
121 CurrentToken->MatchingParen = Left;
122 CurrentToken->Type = TT_TemplateCloser;
123 next();
124 return true;
125 }
126 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square) ||
127 CurrentToken->is(tok::r_brace))
128 return false;
129 if (CurrentToken->is(tok::pipepipe) || CurrentToken->is(tok::ampamp) ||
130 CurrentToken->is(tok::question) || CurrentToken->is(tok::colon))
131 return false;
132 if (CurrentToken->is(tok::comma))
133 ++Left->ParameterCount;
134 if (!consumeToken())
135 return false;
136 }
137 return false;
138 }
139
140 bool parseParens(bool LookForDecls = false) {
141 if (CurrentToken == NULL)
142 return false;
Daniel Jasper01786732013-02-04 07:21:18 +0000143 ScopedBindingStrengthIncrease Increase(*this, 1);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000144 bool StartsObjCMethodExpr = false;
145 AnnotatedToken *Left = CurrentToken->Parent;
146 if (CurrentToken->is(tok::caret)) {
147 // ^( starts a block.
148 Left->Type = TT_ObjCBlockLParen;
149 } else if (AnnotatedToken *MaybeSel = Left->Parent) {
150 // @selector( starts a selector.
151 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent &&
152 MaybeSel->Parent->is(tok::at)) {
153 StartsObjCMethodExpr = true;
154 }
155 }
156
157 ObjCSelectorRAII objCSelector(*this);
158 if (StartsObjCMethodExpr)
159 objCSelector.markStart(*Left);
160
161 while (CurrentToken != NULL) {
162 // LookForDecls is set when "if (" has been seen. Check for
163 // 'identifier' '*' 'identifier' followed by not '=' -- this
164 // '*' has to be a binary operator but determineStarAmpUsage() will
165 // categorize it as an unary operator, so set the right type here.
166 if (LookForDecls && !CurrentToken->Children.empty()) {
167 AnnotatedToken &Prev = *CurrentToken->Parent;
168 AnnotatedToken &Next = CurrentToken->Children[0];
169 if (Prev.Parent->is(tok::identifier) &&
170 (Prev.is(tok::star) || Prev.is(tok::amp)) &&
171 CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) {
172 Prev.Type = TT_BinaryOperator;
173 LookForDecls = false;
174 }
175 }
176
177 if (CurrentToken->is(tok::r_paren)) {
178 Left->MatchingParen = CurrentToken;
179 CurrentToken->MatchingParen = Left;
180
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000181 if (StartsObjCMethodExpr) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000182 objCSelector.markEnd(*CurrentToken);
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000183 if (FirstObjCSelectorName != NULL) {
184 FirstObjCSelectorName->LongestObjCSelectorName =
185 LongestObjCSelectorName;
186 }
187 }
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000188
189 next();
190 return true;
191 }
192 if (CurrentToken->is(tok::r_square) || CurrentToken->is(tok::r_brace))
193 return false;
194 if (CurrentToken->is(tok::comma))
195 ++Left->ParameterCount;
196 if (!consumeToken())
197 return false;
198 }
199 return false;
200 }
201
202 bool parseSquare() {
203 if (!CurrentToken)
204 return false;
Daniel Jasper01786732013-02-04 07:21:18 +0000205 ScopedBindingStrengthIncrease Increase(*this, 10);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000206
207 // A '[' could be an index subscript (after an indentifier or after
208 // ')' or ']'), or it could be the start of an Objective-C method
209 // expression.
210 AnnotatedToken *Left = CurrentToken->Parent;
211 bool StartsObjCMethodExpr =
212 !Left->Parent || Left->Parent->is(tok::colon) ||
213 Left->Parent->is(tok::l_square) || Left->Parent->is(tok::l_paren) ||
214 Left->Parent->is(tok::kw_return) || Left->Parent->is(tok::kw_throw) ||
Nico Weberee0feec2013-02-05 16:21:00 +0000215 isUnaryOperator(*Left->Parent) ||
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000216 getBinOpPrecedence(Left->Parent->FormatTok.Tok.getKind(), true, true) >
217 prec::Unknown;
218
219 ObjCSelectorRAII objCSelector(*this);
220 if (StartsObjCMethodExpr)
221 objCSelector.markStart(*Left);
222
223 while (CurrentToken != NULL) {
224 if (CurrentToken->is(tok::r_square)) {
225 if (!CurrentToken->Children.empty() &&
226 CurrentToken->Children[0].is(tok::l_paren)) {
227 // An ObjC method call can't be followed by an open parenthesis.
228 // FIXME: Do we incorrectly label ":" with this?
229 StartsObjCMethodExpr = false;
230 Left->Type = TT_Unknown;
231 }
Daniel Jasper01786732013-02-04 07:21:18 +0000232 if (StartsObjCMethodExpr) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000233 objCSelector.markEnd(*CurrentToken);
Daniel Jasper01786732013-02-04 07:21:18 +0000234 if (Left->Parent != NULL &&
235 (Left->Parent->is(tok::star) || Left->Parent->is(tok::amp)))
236 Left->Parent->Type = TT_BinaryOperator;
237 }
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000238 Left->MatchingParen = CurrentToken;
239 CurrentToken->MatchingParen = Left;
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000240 if (FirstObjCSelectorName != NULL)
241 FirstObjCSelectorName->LongestObjCSelectorName =
242 LongestObjCSelectorName;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000243 next();
244 return true;
245 }
246 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_brace))
247 return false;
248 if (CurrentToken->is(tok::comma))
249 ++Left->ParameterCount;
250 if (!consumeToken())
251 return false;
252 }
253 return false;
254 }
255
256 bool parseBrace() {
257 // Lines are fine to end with '{'.
258 if (CurrentToken == NULL)
259 return true;
Daniel Jasper01786732013-02-04 07:21:18 +0000260 ScopedBindingStrengthIncrease Increase(*this, 1);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000261 AnnotatedToken *Left = CurrentToken->Parent;
262 while (CurrentToken != NULL) {
263 if (CurrentToken->is(tok::r_brace)) {
264 Left->MatchingParen = CurrentToken;
265 CurrentToken->MatchingParen = Left;
266 next();
267 return true;
268 }
269 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square))
270 return false;
Daniel Jasperf343cab2013-01-31 14:59:26 +0000271 if (CurrentToken->is(tok::comma))
272 ++Left->ParameterCount;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000273 if (!consumeToken())
274 return false;
275 }
276 return true;
277 }
278
279 bool parseConditional() {
280 while (CurrentToken != NULL) {
281 if (CurrentToken->is(tok::colon)) {
282 CurrentToken->Type = TT_ConditionalExpr;
283 next();
284 return true;
285 }
286 if (!consumeToken())
287 return false;
288 }
289 return false;
290 }
291
292 bool parseTemplateDeclaration() {
293 if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
294 CurrentToken->Type = TT_TemplateOpener;
295 next();
296 if (!parseAngle())
297 return false;
298 CurrentToken->Parent->ClosesTemplateDeclaration = true;
299 return true;
300 }
301 return false;
302 }
303
304 bool consumeToken() {
305 AnnotatedToken *Tok = CurrentToken;
306 next();
307 switch (Tok->FormatTok.Tok.getKind()) {
308 case tok::plus:
309 case tok::minus:
310 // At the start of the line, +/- specific ObjectiveC method
311 // declarations.
312 if (Tok->Parent == NULL)
313 Tok->Type = TT_ObjCMethodSpecifier;
314 break;
315 case tok::colon:
316 // Colons from ?: are handled in parseConditional().
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000317 if (Tok->Parent->is(tok::r_paren)) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000318 Tok->Type = TT_CtorInitializerColon;
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000319 } else if (ColonIsObjCMethodExpr ||
320 Line.First.Type == TT_ObjCMethodSpecifier) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000321 Tok->Type = TT_ObjCMethodExpr;
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000322 Tok->Parent->Type = TT_ObjCSelectorName;
323 if (Tok->Parent->FormatTok.TokenLength > LongestObjCSelectorName)
324 LongestObjCSelectorName = Tok->Parent->FormatTok.TokenLength;
325 if (FirstObjCSelectorName == NULL)
326 FirstObjCSelectorName = Tok->Parent;
327 } else if (ColonIsForRangeExpr) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000328 Tok->Type = TT_RangeBasedForLoopColon;
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000329 }
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000330 break;
331 case tok::kw_if:
332 case tok::kw_while:
333 if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
334 next();
335 if (!parseParens(/*LookForDecls=*/ true))
336 return false;
337 }
338 break;
339 case tok::kw_for:
340 ColonIsForRangeExpr = true;
341 next();
342 if (!parseParens())
343 return false;
344 break;
345 case tok::l_paren:
346 if (!parseParens())
347 return false;
348 break;
349 case tok::l_square:
350 if (!parseSquare())
351 return false;
352 break;
353 case tok::l_brace:
354 if (!parseBrace())
355 return false;
356 break;
357 case tok::less:
358 if (parseAngle())
359 Tok->Type = TT_TemplateOpener;
360 else {
361 Tok->Type = TT_BinaryOperator;
362 CurrentToken = Tok;
363 next();
364 }
365 break;
366 case tok::r_paren:
367 case tok::r_square:
368 return false;
369 case tok::r_brace:
370 // Lines can start with '}'.
371 if (Tok->Parent != NULL)
372 return false;
373 break;
374 case tok::greater:
375 Tok->Type = TT_BinaryOperator;
376 break;
377 case tok::kw_operator:
378 if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
379 CurrentToken->Type = TT_OverloadedOperator;
380 next();
381 if (CurrentToken != NULL && CurrentToken->is(tok::r_paren)) {
382 CurrentToken->Type = TT_OverloadedOperator;
383 next();
384 }
385 } else {
386 while (CurrentToken != NULL && CurrentToken->isNot(tok::l_paren)) {
387 CurrentToken->Type = TT_OverloadedOperator;
388 next();
389 }
390 }
391 break;
392 case tok::question:
393 parseConditional();
394 break;
395 case tok::kw_template:
396 parseTemplateDeclaration();
397 break;
398 default:
399 break;
400 }
401 return true;
402 }
403
404 void parseIncludeDirective() {
405 next();
406 if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
407 next();
408 while (CurrentToken != NULL) {
409 if (CurrentToken->isNot(tok::comment) ||
410 !CurrentToken->Children.empty())
411 CurrentToken->Type = TT_ImplicitStringLiteral;
412 next();
413 }
414 } else {
415 while (CurrentToken != NULL) {
416 next();
417 }
418 }
419 }
420
421 void parseWarningOrError() {
422 next();
423 // We still want to format the whitespace left of the first token of the
424 // warning or error.
425 next();
426 while (CurrentToken != NULL) {
427 CurrentToken->Type = TT_ImplicitStringLiteral;
428 next();
429 }
430 }
431
432 void parsePreprocessorDirective() {
433 next();
434 if (CurrentToken == NULL)
435 return;
436 // Hashes in the middle of a line can lead to any strange token
437 // sequence.
438 if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL)
439 return;
440 switch (CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
441 case tok::pp_include:
442 case tok::pp_import:
443 parseIncludeDirective();
444 break;
445 case tok::pp_error:
446 case tok::pp_warning:
447 parseWarningOrError();
448 break;
449 default:
450 break;
451 }
Daniel Jasper5b7e7b02013-02-05 09:34:14 +0000452 while (CurrentToken != NULL)
453 next();
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000454 }
455
456 LineType parseLine() {
457 int PeriodsAndArrows = 0;
458 bool CanBeBuilderTypeStmt = true;
459 if (CurrentToken->is(tok::hash)) {
460 parsePreprocessorDirective();
461 return LT_PreprocessorDirective;
462 }
463 while (CurrentToken != NULL) {
464 if (CurrentToken->is(tok::kw_virtual))
465 KeywordVirtualFound = true;
466 if (CurrentToken->is(tok::period) || CurrentToken->is(tok::arrow))
467 ++PeriodsAndArrows;
468 if (getPrecedence(*CurrentToken) > prec::Assignment &&
469 CurrentToken->isNot(tok::less) && CurrentToken->isNot(tok::greater))
470 CanBeBuilderTypeStmt = false;
471 if (!consumeToken())
472 return LT_Invalid;
473 }
474 if (KeywordVirtualFound)
475 return LT_VirtualFunctionDecl;
476
477 // Assume a builder-type call if there are 2 or more "." and "->".
478 if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt)
479 return LT_BuilderTypeCall;
480
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000481 if (Line.First.Type == TT_ObjCMethodSpecifier) {
482 if (FirstObjCSelectorName != NULL)
483 FirstObjCSelectorName->LongestObjCSelectorName =
484 LongestObjCSelectorName;
485 return LT_ObjCMethodDecl;
486 }
487
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000488 return LT_Other;
489 }
490
491 void next() {
Daniel Jasper01786732013-02-04 07:21:18 +0000492 if (CurrentToken != NULL) {
493 determineTokenType(*CurrentToken);
494 CurrentToken->BindingStrength = BindingStrength;
495 }
496
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000497 if (CurrentToken != NULL && !CurrentToken->Children.empty())
498 CurrentToken = &CurrentToken->Children[0];
499 else
500 CurrentToken = NULL;
501 }
502
503private:
Daniel Jasper01786732013-02-04 07:21:18 +0000504 SourceManager &SourceMgr;
505 Lexer &Lex;
506 AnnotatedLine &Line;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000507 AnnotatedToken *CurrentToken;
508 bool KeywordVirtualFound;
509 bool ColonIsObjCMethodExpr;
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000510 unsigned LongestObjCSelectorName;
511 AnnotatedToken *FirstObjCSelectorName;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000512 bool ColonIsForRangeExpr;
Daniel Jasper01786732013-02-04 07:21:18 +0000513 bool IsExpression;
514 bool LookForFunctionName;
515
516 unsigned BindingStrength;
517
518 void determineTokenType(AnnotatedToken &Current) {
519 if (getPrecedence(Current) == prec::Assignment) {
520 IsExpression = true;
521 AnnotatedToken *Previous = Current.Parent;
522 while (Previous != NULL) {
523 if (Previous->Type == TT_BinaryOperator &&
524 (Previous->is(tok::star) || Previous->is(tok::amp))) {
525 Previous->Type = TT_PointerOrReference;
526 }
527 Previous = Previous->Parent;
528 }
529 }
530 if (Current.is(tok::kw_return) || Current.is(tok::kw_throw) ||
531 (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
532 (Current.Parent == NULL || Current.Parent->isNot(tok::kw_for))))
533 IsExpression = true;
534
535 if (Current.Type == TT_Unknown) {
536 if (LookForFunctionName && Current.is(tok::l_paren)) {
537 findFunctionName(&Current);
538 LookForFunctionName = false;
539 } else if (Current.is(tok::star) || Current.is(tok::amp)) {
540 Current.Type = determineStarAmpUsage(Current, IsExpression);
541 } else if (Current.is(tok::minus) || Current.is(tok::plus) ||
542 Current.is(tok::caret)) {
543 Current.Type = determinePlusMinusCaretUsage(Current);
544 } else if (Current.is(tok::minusminus) || Current.is(tok::plusplus)) {
545 Current.Type = determineIncrementUsage(Current);
546 } else if (Current.is(tok::exclaim)) {
547 Current.Type = TT_UnaryOperator;
548 } else if (isBinaryOperator(Current)) {
549 Current.Type = TT_BinaryOperator;
550 } else if (Current.is(tok::comment)) {
551 std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr,
552 Lex.getLangOpts()));
553 if (StringRef(Data).startswith("//"))
554 Current.Type = TT_LineComment;
555 else
556 Current.Type = TT_BlockComment;
557 } else if (Current.is(tok::r_paren) &&
558 (Current.Parent->Type == TT_PointerOrReference ||
559 Current.Parent->Type == TT_TemplateCloser) &&
560 (Current.Children.empty() ||
561 (Current.Children[0].isNot(tok::equal) &&
562 Current.Children[0].isNot(tok::semi) &&
563 Current.Children[0].isNot(tok::l_brace)))) {
564 // FIXME: We need to get smarter and understand more cases of casts.
565 Current.Type = TT_CastRParen;
566 } else if (Current.is(tok::at) && Current.Children.size()) {
567 switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) {
568 case tok::objc_interface:
569 case tok::objc_implementation:
570 case tok::objc_protocol:
571 Current.Type = TT_ObjCDecl;
572 break;
573 case tok::objc_property:
574 Current.Type = TT_ObjCProperty;
575 break;
576 default:
577 break;
578 }
579 }
580 }
581 }
582
583 /// \brief Starting from \p Current, this searches backwards for an
584 /// identifier which could be the start of a function name and marks it.
585 void findFunctionName(AnnotatedToken *Current) {
586 AnnotatedToken *Parent = Current->Parent;
587 while (Parent != NULL && Parent->Parent != NULL) {
588 if (Parent->is(tok::identifier) &&
589 (Parent->Parent->is(tok::identifier) ||
590 Parent->Parent->Type == TT_PointerOrReference ||
591 Parent->Parent->Type == TT_TemplateCloser)) {
592 Parent->Type = TT_StartOfName;
593 break;
594 }
595 Parent = Parent->Parent;
596 }
597 }
598
599 /// \brief Return the type of the given token assuming it is * or &.
600 TokenType
601 determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression) {
602 const AnnotatedToken *PrevToken = getPreviousToken(Tok);
603 if (PrevToken == NULL)
604 return TT_UnaryOperator;
605
606 const AnnotatedToken *NextToken = getNextToken(Tok);
607 if (NextToken == NULL)
608 return TT_Unknown;
609
610 if (NextToken->is(tok::l_square))
611 return TT_PointerOrReference;
612
613 if (PrevToken->is(tok::l_paren) || PrevToken->is(tok::l_square) ||
614 PrevToken->is(tok::l_brace) || PrevToken->is(tok::comma) ||
615 PrevToken->is(tok::kw_return) || PrevToken->is(tok::colon) ||
616 PrevToken->Type == TT_BinaryOperator ||
617 PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
618 return TT_UnaryOperator;
619
620 if (PrevToken->FormatTok.Tok.isLiteral() || PrevToken->is(tok::r_paren) ||
621 PrevToken->is(tok::r_square) || NextToken->FormatTok.Tok.isLiteral() ||
Nico Weberee0feec2013-02-05 16:21:00 +0000622 isUnaryOperator(*NextToken) || NextToken->is(tok::l_paren) ||
623 NextToken->is(tok::l_square))
Daniel Jasper01786732013-02-04 07:21:18 +0000624 return TT_BinaryOperator;
625
626 if (NextToken->is(tok::comma) || NextToken->is(tok::r_paren) ||
627 NextToken->is(tok::greater))
628 return TT_PointerOrReference;
629
630 // It is very unlikely that we are going to find a pointer or reference type
631 // definition on the RHS of an assignment.
632 if (IsExpression)
633 return TT_BinaryOperator;
634
635 return TT_PointerOrReference;
636 }
637
638 TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) {
639 const AnnotatedToken *PrevToken = getPreviousToken(Tok);
640 if (PrevToken == NULL)
641 return TT_UnaryOperator;
642
643 // Use heuristics to recognize unary operators.
644 if (PrevToken->is(tok::equal) || PrevToken->is(tok::l_paren) ||
645 PrevToken->is(tok::comma) || PrevToken->is(tok::l_square) ||
646 PrevToken->is(tok::question) || PrevToken->is(tok::colon) ||
647 PrevToken->is(tok::kw_return) || PrevToken->is(tok::kw_case) ||
648 PrevToken->is(tok::at) || PrevToken->is(tok::l_brace))
649 return TT_UnaryOperator;
650
Nico Weberee0feec2013-02-05 16:21:00 +0000651 // There can't be two consecutive binary operators.
Daniel Jasper01786732013-02-04 07:21:18 +0000652 if (PrevToken->Type == TT_BinaryOperator)
653 return TT_UnaryOperator;
654
655 // Fall back to marking the token as binary operator.
656 return TT_BinaryOperator;
657 }
658
659 /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
660 TokenType determineIncrementUsage(const AnnotatedToken &Tok) {
661 const AnnotatedToken *PrevToken = getPreviousToken(Tok);
662 if (PrevToken == NULL)
663 return TT_UnaryOperator;
664 if (PrevToken->is(tok::r_paren) || PrevToken->is(tok::r_square) ||
665 PrevToken->is(tok::identifier))
666 return TT_TrailingUnaryOperator;
667
668 return TT_UnaryOperator;
669 }
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000670};
671
672void TokenAnnotator::annotate() {
Daniel Jasper01786732013-02-04 07:21:18 +0000673 AnnotatingParser Parser(SourceMgr, Lex, Line);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000674 Line.Type = Parser.parseLine();
675 if (Line.Type == LT_Invalid)
676 return;
677
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000678 if (Line.First.Type == TT_ObjCMethodSpecifier)
679 Line.Type = LT_ObjCMethodDecl;
680 else if (Line.First.Type == TT_ObjCDecl)
681 Line.Type = LT_ObjCDecl;
682 else if (Line.First.Type == TT_ObjCProperty)
683 Line.Type = LT_ObjCProperty;
684
685 Line.First.SpaceRequiredBefore = true;
686 Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore;
687 Line.First.CanBreakBefore = Line.First.MustBreakBefore;
688
689 Line.First.TotalLength = Line.First.FormatTok.TokenLength;
690 if (!Line.First.Children.empty())
Daniel Jasper01786732013-02-04 07:21:18 +0000691 calculateFormattingInformation(Line.First.Children[0]);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000692}
693
Daniel Jasper01786732013-02-04 07:21:18 +0000694void TokenAnnotator::calculateFormattingInformation(AnnotatedToken &Current) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000695 Current.SpaceRequiredBefore = spaceRequiredBefore(Current);
696
697 if (Current.FormatTok.MustBreakBefore) {
698 Current.MustBreakBefore = true;
Daniel Jasper2752ff32013-02-04 07:32:14 +0000699 } else if (Current.Type == TT_LineComment) {
700 Current.MustBreakBefore = Current.FormatTok.NewlinesBefore > 0;
701 } else if ((Current.Parent->is(tok::comment) &&
702 Current.FormatTok.NewlinesBefore > 0) ||
703 (Current.is(tok::string_literal) &&
704 Current.Parent->is(tok::string_literal))) {
705 Current.MustBreakBefore = true;
Daniel Jasperfa543ac2013-02-04 07:34:48 +0000706 } else if (Current.is(tok::lessless) && !Current.Children.empty() &&
707 Current.Parent->is(tok::string_literal) &&
708 Current.Children[0].is(tok::string_literal)) {
709 Current.MustBreakBefore = true;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000710 } else {
Daniel Jasper2752ff32013-02-04 07:32:14 +0000711 Current.MustBreakBefore = false;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000712 }
713 Current.CanBreakBefore = Current.MustBreakBefore || canBreakBefore(Current);
714 if (Current.MustBreakBefore)
715 Current.TotalLength = Current.Parent->TotalLength + Style.ColumnLimit;
716 else
717 Current.TotalLength =
718 Current.Parent->TotalLength + Current.FormatTok.TokenLength +
719 (Current.SpaceRequiredBefore ? 1 : 0);
720 // FIXME: Only calculate this if CanBreakBefore is true once static
721 // initializers etc. are sorted out.
Daniel Jasper01786732013-02-04 07:21:18 +0000722 // FIXME: Move magic numbers to a better place.
723 Current.SplitPenalty = 20 * Current.BindingStrength + splitPenalty(Current);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000724 if (!Current.Children.empty())
Daniel Jasper01786732013-02-04 07:21:18 +0000725 calculateFormattingInformation(Current.Children[0]);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000726}
727
728unsigned TokenAnnotator::splitPenalty(const AnnotatedToken &Tok) {
729 const AnnotatedToken &Left = *Tok.Parent;
730 const AnnotatedToken &Right = Tok;
731
732 if (Left.is(tok::l_brace) && Right.isNot(tok::l_brace))
733 return 50;
734 if (Left.is(tok::equal) && Right.is(tok::l_brace))
735 return 150;
736 if (Left.is(tok::coloncolon))
737 return 500;
738
739 if (Left.Type == TT_RangeBasedForLoopColon)
740 return 5;
741
742 if (Right.is(tok::arrow) || Right.is(tok::period)) {
743 if (Left.is(tok::r_paren) && Line.Type == LT_BuilderTypeCall)
744 return 5; // Should be smaller than breaking at a nested comma.
745 return 150;
746 }
747
748 // In for-loops, prefer breaking at ',' and ';'.
749 if (Line.First.is(tok::kw_for) &&
750 (Left.isNot(tok::comma) && Left.isNot(tok::semi)))
751 return 20;
752
Daniel Jasper8159d2f2013-02-04 07:30:30 +0000753 if (Left.is(tok::semi))
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000754 return 0;
Daniel Jasper8159d2f2013-02-04 07:30:30 +0000755 if (Left.is(tok::comma))
756 return 1;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000757
758 // In Objective-C method expressions, prefer breaking before "param:" over
759 // breaking after it.
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000760 if (Right.Type == TT_ObjCSelectorName)
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000761 return 0;
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000762 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000763 return 20;
764
Daniel Jasper01786732013-02-04 07:21:18 +0000765 if (Left.is(tok::l_paren) || Left.is(tok::l_square) ||
766 Left.Type == TT_TemplateOpener)
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000767 return 20;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000768
Daniel Jasper01786732013-02-04 07:21:18 +0000769 if (Right.is(tok::lessless))
770 return prec::Shift;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000771 if (Left.Type == TT_ConditionalExpr)
772 return prec::Assignment;
773 prec::Level Level = getPrecedence(Left);
774
775 if (Level != prec::Unknown)
776 return Level;
777
778 return 3;
779}
780
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000781bool TokenAnnotator::spaceRequiredBetween(const AnnotatedToken &Left,
782 const AnnotatedToken &Right) {
783 if (Right.is(tok::hashhash))
784 return Left.is(tok::hash);
785 if (Left.is(tok::hashhash) || Left.is(tok::hash))
786 return Right.is(tok::hash);
787 if (Right.is(tok::r_paren) || Right.is(tok::semi) || Right.is(tok::comma))
788 return false;
789 if (Right.is(tok::less) &&
790 (Left.is(tok::kw_template) ||
791 (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
792 return true;
793 if (Left.is(tok::arrow) || Right.is(tok::arrow))
794 return false;
795 if (Left.is(tok::exclaim) || Left.is(tok::tilde))
796 return false;
797 if (Left.is(tok::at) &&
798 (Right.is(tok::identifier) || Right.is(tok::string_literal) ||
799 Right.is(tok::char_constant) || Right.is(tok::numeric_constant) ||
800 Right.is(tok::l_paren) || Right.is(tok::l_brace) ||
801 Right.is(tok::kw_true) || Right.is(tok::kw_false)))
802 return false;
803 if (Left.is(tok::coloncolon))
804 return false;
805 if (Right.is(tok::coloncolon))
806 return Left.isNot(tok::identifier) && Left.isNot(tok::greater);
807 if (Left.is(tok::less) || Right.is(tok::greater) || Right.is(tok::less))
808 return false;
809 if (Right.is(tok::amp) || Right.is(tok::star))
810 return Left.FormatTok.Tok.isLiteral() ||
811 (Left.isNot(tok::star) && Left.isNot(tok::amp) &&
812 !Style.PointerAndReferenceBindToType);
813 if (Left.is(tok::amp) || Left.is(tok::star))
814 return Right.FormatTok.Tok.isLiteral() ||
815 Style.PointerAndReferenceBindToType;
816 if (Right.is(tok::star) && Left.is(tok::l_paren))
817 return false;
818 if (Left.is(tok::l_square) || Right.is(tok::r_square))
819 return false;
820 if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr)
821 return false;
822 if (Left.is(tok::period) || Right.is(tok::period))
823 return false;
824 if (Left.is(tok::colon))
825 return Left.Type != TT_ObjCMethodExpr;
826 if (Right.is(tok::colon))
827 return Right.Type != TT_ObjCMethodExpr;
828 if (Left.is(tok::l_paren))
829 return false;
830 if (Right.is(tok::l_paren)) {
831 return Line.Type == LT_ObjCDecl || Left.is(tok::kw_if) ||
832 Left.is(tok::kw_for) || Left.is(tok::kw_while) ||
833 Left.is(tok::kw_switch) || Left.is(tok::kw_return) ||
834 Left.is(tok::kw_catch) || Left.is(tok::kw_new) ||
835 Left.is(tok::kw_delete);
836 }
837 if (Left.is(tok::at) &&
838 Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword)
839 return false;
840 if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
841 return false;
842 return true;
843}
844
845bool TokenAnnotator::spaceRequiredBefore(const AnnotatedToken &Tok) {
846 if (Line.Type == LT_ObjCMethodDecl) {
847 if (Tok.is(tok::identifier) && !Tok.Children.empty() &&
848 Tok.Children[0].is(tok::colon) && Tok.Parent->is(tok::identifier))
849 return true;
850 if (Tok.is(tok::colon))
851 return false;
852 if (Tok.Parent->Type == TT_ObjCMethodSpecifier)
853 return true;
854 if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier))
855 // Don't space between ')' and <id>
856 return false;
857 if (Tok.Parent->is(tok::colon) && Tok.is(tok::l_paren))
858 // Don't space between ':' and '('
859 return false;
860 }
861 if (Line.Type == LT_ObjCProperty &&
862 (Tok.is(tok::equal) || Tok.Parent->is(tok::equal)))
863 return false;
864
865 if (Tok.Parent->is(tok::comma))
866 return true;
867 if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen)
868 return true;
869 if (Tok.Type == TT_OverloadedOperator)
870 return Tok.is(tok::identifier) || Tok.is(tok::kw_new) ||
871 Tok.is(tok::kw_delete) || Tok.is(tok::kw_bool);
872 if (Tok.Parent->Type == TT_OverloadedOperator)
873 return false;
874 if (Tok.is(tok::colon))
875 return Line.First.isNot(tok::kw_case) && !Tok.Children.empty() &&
876 Tok.Type != TT_ObjCMethodExpr;
877 if (Tok.Parent->Type == TT_UnaryOperator || Tok.Parent->Type == TT_CastRParen)
878 return false;
879 if (Tok.Type == TT_UnaryOperator)
880 return Tok.Parent->isNot(tok::l_paren) &&
881 Tok.Parent->isNot(tok::l_square) && Tok.Parent->isNot(tok::at) &&
882 (Tok.Parent->isNot(tok::colon) ||
883 Tok.Parent->Type != TT_ObjCMethodExpr);
884 if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) {
885 return Tok.Type == TT_TemplateCloser && Tok.Parent->Type ==
886 TT_TemplateCloser && Style.SplitTemplateClosingGreater;
887 }
888 if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator)
889 return true;
890 if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
891 return false;
892 if (Tok.is(tok::less) && Line.First.is(tok::hash))
893 return true;
894 if (Tok.Type == TT_TrailingUnaryOperator)
895 return false;
896 return spaceRequiredBetween(*Tok.Parent, Tok);
897}
898
899bool TokenAnnotator::canBreakBefore(const AnnotatedToken &Right) {
900 const AnnotatedToken &Left = *Right.Parent;
901 if (Line.Type == LT_ObjCMethodDecl) {
902 if (Right.is(tok::identifier) && !Right.Children.empty() &&
903 Right.Children[0].is(tok::colon) && Left.is(tok::identifier))
904 return true;
905 if (Right.is(tok::identifier) && Left.is(tok::l_paren) &&
906 Left.Parent->is(tok::colon))
907 // Don't break this identifier as ':' or identifier
908 // before it will break.
909 return false;
910 if (Right.is(tok::colon) && Left.is(tok::identifier) && Left.CanBreakBefore)
911 // Don't break at ':' if identifier before it can beak.
912 return false;
913 }
914 if (Right.Type == TT_StartOfName && Style.AllowReturnTypeOnItsOwnLine)
915 return true;
916 if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
917 return false;
918 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
919 return true;
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000920 if (Right.Type == TT_ObjCSelectorName)
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000921 return true;
922 if (Left.ClosesTemplateDeclaration)
923 return true;
924 if (Right.Type == TT_ConditionalExpr || Right.is(tok::question))
925 return true;
926 if (Left.Type == TT_RangeBasedForLoopColon)
927 return true;
928 if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
929 Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr ||
930 Left.is(tok::question))
931 return false;
932 if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
933 return false;
934
935 if (Right.Type == TT_LineComment)
936 // We rely on MustBreakBefore being set correctly here as we should not
937 // change the "binding" behavior of a comment.
938 return false;
939
940 // Allow breaking after a trailing 'const', e.g. after a method declaration,
941 // unless it is follow by ';', '{' or '='.
942 if (Left.is(tok::kw_const) && Left.Parent != NULL &&
943 Left.Parent->is(tok::r_paren))
944 return Right.isNot(tok::l_brace) && Right.isNot(tok::semi) &&
945 Right.isNot(tok::equal);
946
947 // We only break before r_brace if there was a corresponding break before
948 // the l_brace, which is tracked by BreakBeforeClosingBrace.
949 if (Right.is(tok::r_brace))
950 return false;
951
952 if (Right.is(tok::r_paren) || Right.is(tok::greater))
953 return false;
954 return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) ||
955 Left.is(tok::comma) || Right.is(tok::lessless) ||
956 Right.is(tok::arrow) || Right.is(tok::period) ||
957 Right.is(tok::colon) || Left.is(tok::coloncolon) ||
958 Left.is(tok::semi) || Left.is(tok::l_brace) ||
959 (Left.is(tok::r_paren) && Left.Type != TT_CastRParen &&
960 Right.is(tok::identifier)) ||
961 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
962 (Left.is(tok::l_square) && !Right.is(tok::r_square));
963}
964
965} // namespace format
966} // namespace clang