blob: ca8d595df76b09951b34dae4c959a42ec3982c53 [file] [log] [blame]
Daniel Jasper32d28ee2013-01-29 21:01:14 +00001//===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements a token annotator, i.e. creates
12/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13///
14//===----------------------------------------------------------------------===//
15
16#include "TokenAnnotator.h"
17#include "clang/Basic/SourceManager.h"
18#include "clang/Lex/Lexer.h"
19
20namespace clang {
21namespace format {
22
Nico Weberee0feec2013-02-05 16:21:00 +000023static bool isUnaryOperator(const AnnotatedToken &Tok) {
24 switch (Tok.FormatTok.Tok.getKind()) {
25 case tok::plus:
26 case tok::plusplus:
27 case tok::minus:
28 case tok::minusminus:
29 case tok::exclaim:
30 case tok::tilde:
31 case tok::kw_sizeof:
32 case tok::kw_alignof:
33 return true;
34 default:
35 return false;
36 }
37}
38
Daniel Jasper32d28ee2013-01-29 21:01:14 +000039static bool isBinaryOperator(const AnnotatedToken &Tok) {
40 // Comma is a binary operator, but does not behave as such wrt. formatting.
41 return getPrecedence(Tok) > prec::Comma;
42}
43
Daniel Jasper01786732013-02-04 07:21:18 +000044// Returns the previous token ignoring comments.
45static const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) {
46 const AnnotatedToken *PrevToken = Tok.Parent;
47 while (PrevToken != NULL && PrevToken->is(tok::comment))
48 PrevToken = PrevToken->Parent;
49 return PrevToken;
50}
51
52// Returns the next token ignoring comments.
53static const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) {
54 if (Tok.Children.empty())
55 return NULL;
56 const AnnotatedToken *NextToken = &Tok.Children[0];
57 while (NextToken->is(tok::comment)) {
58 if (NextToken->Children.empty())
59 return NULL;
60 NextToken = &NextToken->Children[0];
61 }
62 return NextToken;
63}
64
Daniel Jasper32d28ee2013-01-29 21:01:14 +000065/// \brief A parser that gathers additional information about tokens.
66///
67/// The \c TokenAnnotator tries to matches parenthesis and square brakets and
68/// store a parenthesis levels. It also tries to resolve matching "<" and ">"
69/// into template parameter lists.
70class AnnotatingParser {
71public:
Daniel Jasper01786732013-02-04 07:21:18 +000072 AnnotatingParser(SourceManager &SourceMgr, Lexer &Lex, AnnotatedLine &Line)
73 : SourceMgr(SourceMgr), Lex(Lex), Line(Line), CurrentToken(&Line.First),
Daniel Jasper4e778092013-02-06 10:05:46 +000074 KeywordVirtualFound(false) {
75 Contexts.push_back(Context(1, /*IsExpression=*/ false));
76 Contexts.back().LookForFunctionName = Line.MustBeDeclaration;
Daniel Jasper32d28ee2013-01-29 21:01:14 +000077 }
78
Daniel Jasper32d28ee2013-01-29 21:01:14 +000079 bool parseAngle() {
80 if (CurrentToken == NULL)
81 return false;
Daniel Jasper4e778092013-02-06 10:05:46 +000082 ScopedContextCreator ContextCreator(*this, 10);
Daniel Jasper32d28ee2013-01-29 21:01:14 +000083 AnnotatedToken *Left = CurrentToken->Parent;
Daniel Jasper4e778092013-02-06 10:05:46 +000084 Contexts.back().IsExpression = false;
Daniel Jasper32d28ee2013-01-29 21:01:14 +000085 while (CurrentToken != NULL) {
86 if (CurrentToken->is(tok::greater)) {
87 Left->MatchingParen = CurrentToken;
88 CurrentToken->MatchingParen = Left;
89 CurrentToken->Type = TT_TemplateCloser;
90 next();
91 return true;
92 }
93 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square) ||
94 CurrentToken->is(tok::r_brace))
95 return false;
96 if (CurrentToken->is(tok::pipepipe) || CurrentToken->is(tok::ampamp) ||
97 CurrentToken->is(tok::question) || CurrentToken->is(tok::colon))
98 return false;
99 if (CurrentToken->is(tok::comma))
100 ++Left->ParameterCount;
101 if (!consumeToken())
102 return false;
103 }
104 return false;
105 }
106
107 bool parseParens(bool LookForDecls = false) {
108 if (CurrentToken == NULL)
109 return false;
Daniel Jasper4e778092013-02-06 10:05:46 +0000110 ScopedContextCreator ContextCreator(*this, 1);
111
112 // FIXME: This is a bit of a hack. Do better.
113 Contexts.back().ColonIsForRangeExpr =
114 Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
115
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000116 bool StartsObjCMethodExpr = false;
117 AnnotatedToken *Left = CurrentToken->Parent;
118 if (CurrentToken->is(tok::caret)) {
119 // ^( starts a block.
120 Left->Type = TT_ObjCBlockLParen;
121 } else if (AnnotatedToken *MaybeSel = Left->Parent) {
122 // @selector( starts a selector.
123 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent &&
124 MaybeSel->Parent->is(tok::at)) {
125 StartsObjCMethodExpr = true;
126 }
127 }
128
Daniel Jasper4e778092013-02-06 10:05:46 +0000129 if (StartsObjCMethodExpr) {
130 Contexts.back().ColonIsObjCMethodExpr = true;
131 Left->Type = TT_ObjCMethodExpr;
132 }
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000133
134 while (CurrentToken != NULL) {
135 // LookForDecls is set when "if (" has been seen. Check for
136 // 'identifier' '*' 'identifier' followed by not '=' -- this
137 // '*' has to be a binary operator but determineStarAmpUsage() will
138 // categorize it as an unary operator, so set the right type here.
139 if (LookForDecls && !CurrentToken->Children.empty()) {
140 AnnotatedToken &Prev = *CurrentToken->Parent;
141 AnnotatedToken &Next = CurrentToken->Children[0];
142 if (Prev.Parent->is(tok::identifier) &&
143 (Prev.is(tok::star) || Prev.is(tok::amp)) &&
144 CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) {
145 Prev.Type = TT_BinaryOperator;
146 LookForDecls = false;
147 }
148 }
149
150 if (CurrentToken->is(tok::r_paren)) {
151 Left->MatchingParen = CurrentToken;
152 CurrentToken->MatchingParen = Left;
153
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000154 if (StartsObjCMethodExpr) {
Daniel Jasper4e778092013-02-06 10:05:46 +0000155 CurrentToken->Type = TT_ObjCMethodExpr;
156 if (Contexts.back().FirstObjCSelectorName != NULL) {
157 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
158 Contexts.back().LongestObjCSelectorName;
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000159 }
160 }
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000161
162 next();
163 return true;
164 }
165 if (CurrentToken->is(tok::r_square) || CurrentToken->is(tok::r_brace))
166 return false;
167 if (CurrentToken->is(tok::comma))
168 ++Left->ParameterCount;
169 if (!consumeToken())
170 return false;
171 }
172 return false;
173 }
174
175 bool parseSquare() {
176 if (!CurrentToken)
177 return false;
Daniel Jasper4e778092013-02-06 10:05:46 +0000178 ScopedContextCreator ContextCreator(*this, 10);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000179
180 // A '[' could be an index subscript (after an indentifier or after
181 // ')' or ']'), or it could be the start of an Objective-C method
182 // expression.
183 AnnotatedToken *Left = CurrentToken->Parent;
184 bool StartsObjCMethodExpr =
185 !Left->Parent || Left->Parent->is(tok::colon) ||
186 Left->Parent->is(tok::l_square) || Left->Parent->is(tok::l_paren) ||
187 Left->Parent->is(tok::kw_return) || Left->Parent->is(tok::kw_throw) ||
Nico Weberee0feec2013-02-05 16:21:00 +0000188 isUnaryOperator(*Left->Parent) ||
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000189 getBinOpPrecedence(Left->Parent->FormatTok.Tok.getKind(), true, true) >
190 prec::Unknown;
191
Daniel Jasper4e778092013-02-06 10:05:46 +0000192 if (StartsObjCMethodExpr) {
193 Contexts.back().ColonIsObjCMethodExpr = true;
194 Left->Type = TT_ObjCMethodExpr;
195 }
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000196
197 while (CurrentToken != NULL) {
198 if (CurrentToken->is(tok::r_square)) {
199 if (!CurrentToken->Children.empty() &&
200 CurrentToken->Children[0].is(tok::l_paren)) {
Nico Webere8a97982013-02-06 06:20:11 +0000201 // An ObjC method call is rarely followed by an open parenthesis.
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000202 // FIXME: Do we incorrectly label ":" with this?
203 StartsObjCMethodExpr = false;
204 Left->Type = TT_Unknown;
205 }
Daniel Jasper01786732013-02-04 07:21:18 +0000206 if (StartsObjCMethodExpr) {
Daniel Jasper4e778092013-02-06 10:05:46 +0000207 CurrentToken->Type = TT_ObjCMethodExpr;
Nico Webere8a97982013-02-06 06:20:11 +0000208 // determineStarAmpUsage() thinks that '*' '[' is allocating an
209 // array of pointers, but if '[' starts a selector then '*' is a
210 // binary operator.
Daniel Jasper01786732013-02-04 07:21:18 +0000211 if (Left->Parent != NULL &&
Nico Webere8a97982013-02-06 06:20:11 +0000212 (Left->Parent->is(tok::star) || Left->Parent->is(tok::amp)) &&
213 Left->Parent->Type == TT_PointerOrReference)
Daniel Jasper01786732013-02-04 07:21:18 +0000214 Left->Parent->Type = TT_BinaryOperator;
215 }
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000216 Left->MatchingParen = CurrentToken;
217 CurrentToken->MatchingParen = Left;
Daniel Jasper4e778092013-02-06 10:05:46 +0000218 if (Contexts.back().FirstObjCSelectorName != NULL)
219 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
220 Contexts.back().LongestObjCSelectorName;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000221 next();
222 return true;
223 }
224 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_brace))
225 return false;
226 if (CurrentToken->is(tok::comma))
227 ++Left->ParameterCount;
228 if (!consumeToken())
229 return false;
230 }
231 return false;
232 }
233
234 bool parseBrace() {
235 // Lines are fine to end with '{'.
236 if (CurrentToken == NULL)
237 return true;
Daniel Jasper4e778092013-02-06 10:05:46 +0000238 ScopedContextCreator ContextCreator(*this, 1);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000239 AnnotatedToken *Left = CurrentToken->Parent;
240 while (CurrentToken != NULL) {
241 if (CurrentToken->is(tok::r_brace)) {
242 Left->MatchingParen = CurrentToken;
243 CurrentToken->MatchingParen = Left;
244 next();
245 return true;
246 }
247 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square))
248 return false;
Daniel Jasperf343cab2013-01-31 14:59:26 +0000249 if (CurrentToken->is(tok::comma))
250 ++Left->ParameterCount;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000251 if (!consumeToken())
252 return false;
253 }
254 return true;
255 }
256
257 bool parseConditional() {
258 while (CurrentToken != NULL) {
259 if (CurrentToken->is(tok::colon)) {
260 CurrentToken->Type = TT_ConditionalExpr;
261 next();
262 return true;
263 }
264 if (!consumeToken())
265 return false;
266 }
267 return false;
268 }
269
270 bool parseTemplateDeclaration() {
271 if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
272 CurrentToken->Type = TT_TemplateOpener;
273 next();
274 if (!parseAngle())
275 return false;
276 CurrentToken->Parent->ClosesTemplateDeclaration = true;
277 return true;
278 }
279 return false;
280 }
281
282 bool consumeToken() {
283 AnnotatedToken *Tok = CurrentToken;
284 next();
285 switch (Tok->FormatTok.Tok.getKind()) {
286 case tok::plus:
287 case tok::minus:
288 // At the start of the line, +/- specific ObjectiveC method
289 // declarations.
290 if (Tok->Parent == NULL)
291 Tok->Type = TT_ObjCMethodSpecifier;
292 break;
293 case tok::colon:
294 // Colons from ?: are handled in parseConditional().
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000295 if (Tok->Parent->is(tok::r_paren)) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000296 Tok->Type = TT_CtorInitializerColon;
Daniel Jasper4e778092013-02-06 10:05:46 +0000297 } else if (Contexts.back().ColonIsObjCMethodExpr ||
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000298 Line.First.Type == TT_ObjCMethodSpecifier) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000299 Tok->Type = TT_ObjCMethodExpr;
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000300 Tok->Parent->Type = TT_ObjCSelectorName;
Daniel Jasper4e778092013-02-06 10:05:46 +0000301 if (Tok->Parent->FormatTok.TokenLength >
302 Contexts.back().LongestObjCSelectorName)
303 Contexts.back().LongestObjCSelectorName =
304 Tok->Parent->FormatTok.TokenLength;
305 if (Contexts.back().FirstObjCSelectorName == NULL)
306 Contexts.back().FirstObjCSelectorName = Tok->Parent;
307 } else if (Contexts.back().ColonIsForRangeExpr) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000308 Tok->Type = TT_RangeBasedForLoopColon;
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000309 }
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000310 break;
311 case tok::kw_if:
312 case tok::kw_while:
313 if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
314 next();
315 if (!parseParens(/*LookForDecls=*/ true))
316 return false;
317 }
318 break;
319 case tok::kw_for:
Daniel Jasper4e778092013-02-06 10:05:46 +0000320 Contexts.back().ColonIsForRangeExpr = true;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000321 next();
322 if (!parseParens())
323 return false;
324 break;
325 case tok::l_paren:
326 if (!parseParens())
327 return false;
328 break;
329 case tok::l_square:
330 if (!parseSquare())
331 return false;
332 break;
333 case tok::l_brace:
334 if (!parseBrace())
335 return false;
336 break;
337 case tok::less:
338 if (parseAngle())
339 Tok->Type = TT_TemplateOpener;
340 else {
341 Tok->Type = TT_BinaryOperator;
342 CurrentToken = Tok;
343 next();
344 }
345 break;
346 case tok::r_paren:
347 case tok::r_square:
348 return false;
349 case tok::r_brace:
350 // Lines can start with '}'.
351 if (Tok->Parent != NULL)
352 return false;
353 break;
354 case tok::greater:
355 Tok->Type = TT_BinaryOperator;
356 break;
357 case tok::kw_operator:
358 if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
359 CurrentToken->Type = TT_OverloadedOperator;
360 next();
361 if (CurrentToken != NULL && CurrentToken->is(tok::r_paren)) {
362 CurrentToken->Type = TT_OverloadedOperator;
363 next();
364 }
365 } else {
366 while (CurrentToken != NULL && CurrentToken->isNot(tok::l_paren)) {
367 CurrentToken->Type = TT_OverloadedOperator;
368 next();
369 }
370 }
371 break;
372 case tok::question:
373 parseConditional();
374 break;
375 case tok::kw_template:
376 parseTemplateDeclaration();
377 break;
378 default:
379 break;
380 }
381 return true;
382 }
383
384 void parseIncludeDirective() {
385 next();
386 if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
387 next();
388 while (CurrentToken != NULL) {
389 if (CurrentToken->isNot(tok::comment) ||
390 !CurrentToken->Children.empty())
391 CurrentToken->Type = TT_ImplicitStringLiteral;
392 next();
393 }
394 } else {
395 while (CurrentToken != NULL) {
396 next();
397 }
398 }
399 }
400
401 void parseWarningOrError() {
402 next();
403 // We still want to format the whitespace left of the first token of the
404 // warning or error.
405 next();
406 while (CurrentToken != NULL) {
407 CurrentToken->Type = TT_ImplicitStringLiteral;
408 next();
409 }
410 }
411
412 void parsePreprocessorDirective() {
413 next();
414 if (CurrentToken == NULL)
415 return;
416 // Hashes in the middle of a line can lead to any strange token
417 // sequence.
418 if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL)
419 return;
420 switch (CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
421 case tok::pp_include:
422 case tok::pp_import:
423 parseIncludeDirective();
424 break;
425 case tok::pp_error:
426 case tok::pp_warning:
427 parseWarningOrError();
428 break;
429 default:
430 break;
431 }
Daniel Jasper5b7e7b02013-02-05 09:34:14 +0000432 while (CurrentToken != NULL)
433 next();
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000434 }
435
436 LineType parseLine() {
437 int PeriodsAndArrows = 0;
438 bool CanBeBuilderTypeStmt = true;
439 if (CurrentToken->is(tok::hash)) {
440 parsePreprocessorDirective();
441 return LT_PreprocessorDirective;
442 }
443 while (CurrentToken != NULL) {
444 if (CurrentToken->is(tok::kw_virtual))
445 KeywordVirtualFound = true;
446 if (CurrentToken->is(tok::period) || CurrentToken->is(tok::arrow))
447 ++PeriodsAndArrows;
448 if (getPrecedence(*CurrentToken) > prec::Assignment &&
449 CurrentToken->isNot(tok::less) && CurrentToken->isNot(tok::greater))
450 CanBeBuilderTypeStmt = false;
451 if (!consumeToken())
452 return LT_Invalid;
453 }
454 if (KeywordVirtualFound)
455 return LT_VirtualFunctionDecl;
456
457 // Assume a builder-type call if there are 2 or more "." and "->".
458 if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt)
459 return LT_BuilderTypeCall;
460
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000461 if (Line.First.Type == TT_ObjCMethodSpecifier) {
Daniel Jasper4e778092013-02-06 10:05:46 +0000462 if (Contexts.back().FirstObjCSelectorName != NULL)
463 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
464 Contexts.back().LongestObjCSelectorName;
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000465 return LT_ObjCMethodDecl;
466 }
467
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000468 return LT_Other;
469 }
470
471 void next() {
Daniel Jasper01786732013-02-04 07:21:18 +0000472 if (CurrentToken != NULL) {
473 determineTokenType(*CurrentToken);
Daniel Jasper4e778092013-02-06 10:05:46 +0000474 CurrentToken->BindingStrength = Contexts.back().BindingStrength;
Daniel Jasper01786732013-02-04 07:21:18 +0000475 }
476
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000477 if (CurrentToken != NULL && !CurrentToken->Children.empty())
478 CurrentToken = &CurrentToken->Children[0];
479 else
480 CurrentToken = NULL;
481 }
482
483private:
Daniel Jasper4e778092013-02-06 10:05:46 +0000484 /// \brief A struct to hold information valid in a specific context, e.g.
485 /// a pair of parenthesis.
486 struct Context {
487 Context(unsigned BindingStrength, bool IsExpression)
488 : BindingStrength(BindingStrength), LongestObjCSelectorName(0),
489 ColonIsForRangeExpr(false), ColonIsObjCMethodExpr(false),
490 FirstObjCSelectorName(NULL), IsExpression(IsExpression),
491 LookForFunctionName(false) {
492 }
Daniel Jasper01786732013-02-04 07:21:18 +0000493
Daniel Jasper4e778092013-02-06 10:05:46 +0000494 unsigned BindingStrength;
495 unsigned LongestObjCSelectorName;
496 bool ColonIsForRangeExpr;
497 bool ColonIsObjCMethodExpr;
498 AnnotatedToken *FirstObjCSelectorName;
499 bool IsExpression;
500 bool LookForFunctionName;
501 };
502
503 /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime
504 /// of each instance.
505 struct ScopedContextCreator {
506 AnnotatingParser &P;
507
508 ScopedContextCreator(AnnotatingParser &P, unsigned Increase)
509 : P(P) {
510 P.Contexts.push_back(Context(
511 P.Contexts.back().BindingStrength + Increase,
512 P.Contexts.back().IsExpression));
513 }
514
515 ~ScopedContextCreator() { P.Contexts.pop_back(); }
516 };
Daniel Jasper01786732013-02-04 07:21:18 +0000517
518 void determineTokenType(AnnotatedToken &Current) {
519 if (getPrecedence(Current) == prec::Assignment) {
Daniel Jasper4e778092013-02-06 10:05:46 +0000520 Contexts.back().IsExpression = true;
Daniel Jasper01786732013-02-04 07:21:18 +0000521 AnnotatedToken *Previous = Current.Parent;
522 while (Previous != NULL) {
523 if (Previous->Type == TT_BinaryOperator &&
524 (Previous->is(tok::star) || Previous->is(tok::amp))) {
525 Previous->Type = TT_PointerOrReference;
526 }
527 Previous = Previous->Parent;
528 }
529 }
530 if (Current.is(tok::kw_return) || Current.is(tok::kw_throw) ||
531 (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
532 (Current.Parent == NULL || Current.Parent->isNot(tok::kw_for))))
Daniel Jasper4e778092013-02-06 10:05:46 +0000533 Contexts.back().IsExpression = true;
Daniel Jasper01786732013-02-04 07:21:18 +0000534
535 if (Current.Type == TT_Unknown) {
Daniel Jasper4e778092013-02-06 10:05:46 +0000536 if (Contexts.back().LookForFunctionName && Current.is(tok::l_paren)) {
Daniel Jasper01786732013-02-04 07:21:18 +0000537 findFunctionName(&Current);
Daniel Jasper4e778092013-02-06 10:05:46 +0000538 Contexts.back().LookForFunctionName = false;
Daniel Jasper01786732013-02-04 07:21:18 +0000539 } else if (Current.is(tok::star) || Current.is(tok::amp)) {
Daniel Jasper4e778092013-02-06 10:05:46 +0000540 Current.Type =
541 determineStarAmpUsage(Current, Contexts.back().IsExpression);
Daniel Jasper01786732013-02-04 07:21:18 +0000542 } else if (Current.is(tok::minus) || Current.is(tok::plus) ||
543 Current.is(tok::caret)) {
544 Current.Type = determinePlusMinusCaretUsage(Current);
545 } else if (Current.is(tok::minusminus) || Current.is(tok::plusplus)) {
546 Current.Type = determineIncrementUsage(Current);
547 } else if (Current.is(tok::exclaim)) {
548 Current.Type = TT_UnaryOperator;
549 } else if (isBinaryOperator(Current)) {
550 Current.Type = TT_BinaryOperator;
551 } else if (Current.is(tok::comment)) {
552 std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr,
553 Lex.getLangOpts()));
554 if (StringRef(Data).startswith("//"))
555 Current.Type = TT_LineComment;
556 else
557 Current.Type = TT_BlockComment;
558 } else if (Current.is(tok::r_paren) &&
559 (Current.Parent->Type == TT_PointerOrReference ||
560 Current.Parent->Type == TT_TemplateCloser) &&
561 (Current.Children.empty() ||
562 (Current.Children[0].isNot(tok::equal) &&
563 Current.Children[0].isNot(tok::semi) &&
564 Current.Children[0].isNot(tok::l_brace)))) {
565 // FIXME: We need to get smarter and understand more cases of casts.
566 Current.Type = TT_CastRParen;
567 } else if (Current.is(tok::at) && Current.Children.size()) {
568 switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) {
569 case tok::objc_interface:
570 case tok::objc_implementation:
571 case tok::objc_protocol:
572 Current.Type = TT_ObjCDecl;
573 break;
574 case tok::objc_property:
575 Current.Type = TT_ObjCProperty;
576 break;
577 default:
578 break;
579 }
580 }
581 }
582 }
583
584 /// \brief Starting from \p Current, this searches backwards for an
585 /// identifier which could be the start of a function name and marks it.
586 void findFunctionName(AnnotatedToken *Current) {
587 AnnotatedToken *Parent = Current->Parent;
588 while (Parent != NULL && Parent->Parent != NULL) {
589 if (Parent->is(tok::identifier) &&
590 (Parent->Parent->is(tok::identifier) ||
591 Parent->Parent->Type == TT_PointerOrReference ||
592 Parent->Parent->Type == TT_TemplateCloser)) {
593 Parent->Type = TT_StartOfName;
594 break;
595 }
596 Parent = Parent->Parent;
597 }
598 }
599
600 /// \brief Return the type of the given token assuming it is * or &.
601 TokenType
602 determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression) {
603 const AnnotatedToken *PrevToken = getPreviousToken(Tok);
604 if (PrevToken == NULL)
605 return TT_UnaryOperator;
606
607 const AnnotatedToken *NextToken = getNextToken(Tok);
608 if (NextToken == NULL)
609 return TT_Unknown;
610
Daniel Jasper01786732013-02-04 07:21:18 +0000611 if (PrevToken->is(tok::l_paren) || PrevToken->is(tok::l_square) ||
612 PrevToken->is(tok::l_brace) || PrevToken->is(tok::comma) ||
613 PrevToken->is(tok::kw_return) || PrevToken->is(tok::colon) ||
Nico Webere8a97982013-02-06 06:20:11 +0000614 PrevToken->is(tok::equal) || PrevToken->Type == TT_BinaryOperator ||
Daniel Jasper01786732013-02-04 07:21:18 +0000615 PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
616 return TT_UnaryOperator;
617
Nico Webere8a97982013-02-06 06:20:11 +0000618 if (NextToken->is(tok::l_square))
619 return TT_PointerOrReference;
620
Daniel Jasper01786732013-02-04 07:21:18 +0000621 if (PrevToken->FormatTok.Tok.isLiteral() || PrevToken->is(tok::r_paren) ||
622 PrevToken->is(tok::r_square) || NextToken->FormatTok.Tok.isLiteral() ||
Nico Weberee0feec2013-02-05 16:21:00 +0000623 isUnaryOperator(*NextToken) || NextToken->is(tok::l_paren) ||
624 NextToken->is(tok::l_square))
Daniel Jasper01786732013-02-04 07:21:18 +0000625 return TT_BinaryOperator;
626
627 if (NextToken->is(tok::comma) || NextToken->is(tok::r_paren) ||
628 NextToken->is(tok::greater))
629 return TT_PointerOrReference;
630
631 // It is very unlikely that we are going to find a pointer or reference type
632 // definition on the RHS of an assignment.
633 if (IsExpression)
634 return TT_BinaryOperator;
635
636 return TT_PointerOrReference;
637 }
638
639 TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) {
640 const AnnotatedToken *PrevToken = getPreviousToken(Tok);
641 if (PrevToken == NULL)
642 return TT_UnaryOperator;
643
644 // Use heuristics to recognize unary operators.
645 if (PrevToken->is(tok::equal) || PrevToken->is(tok::l_paren) ||
646 PrevToken->is(tok::comma) || PrevToken->is(tok::l_square) ||
647 PrevToken->is(tok::question) || PrevToken->is(tok::colon) ||
648 PrevToken->is(tok::kw_return) || PrevToken->is(tok::kw_case) ||
649 PrevToken->is(tok::at) || PrevToken->is(tok::l_brace))
650 return TT_UnaryOperator;
651
Nico Weberee0feec2013-02-05 16:21:00 +0000652 // There can't be two consecutive binary operators.
Daniel Jasper01786732013-02-04 07:21:18 +0000653 if (PrevToken->Type == TT_BinaryOperator)
654 return TT_UnaryOperator;
655
656 // Fall back to marking the token as binary operator.
657 return TT_BinaryOperator;
658 }
659
660 /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
661 TokenType determineIncrementUsage(const AnnotatedToken &Tok) {
662 const AnnotatedToken *PrevToken = getPreviousToken(Tok);
663 if (PrevToken == NULL)
664 return TT_UnaryOperator;
665 if (PrevToken->is(tok::r_paren) || PrevToken->is(tok::r_square) ||
666 PrevToken->is(tok::identifier))
667 return TT_TrailingUnaryOperator;
668
669 return TT_UnaryOperator;
670 }
Daniel Jasper4e778092013-02-06 10:05:46 +0000671
672 SmallVector<Context, 8> Contexts;
673
674 SourceManager &SourceMgr;
675 Lexer &Lex;
676 AnnotatedLine &Line;
677 AnnotatedToken *CurrentToken;
678 bool KeywordVirtualFound;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000679};
680
681void TokenAnnotator::annotate() {
Daniel Jasper01786732013-02-04 07:21:18 +0000682 AnnotatingParser Parser(SourceMgr, Lex, Line);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000683 Line.Type = Parser.parseLine();
684 if (Line.Type == LT_Invalid)
685 return;
686
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000687 if (Line.First.Type == TT_ObjCMethodSpecifier)
688 Line.Type = LT_ObjCMethodDecl;
689 else if (Line.First.Type == TT_ObjCDecl)
690 Line.Type = LT_ObjCDecl;
691 else if (Line.First.Type == TT_ObjCProperty)
692 Line.Type = LT_ObjCProperty;
693
694 Line.First.SpaceRequiredBefore = true;
695 Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore;
696 Line.First.CanBreakBefore = Line.First.MustBreakBefore;
697
698 Line.First.TotalLength = Line.First.FormatTok.TokenLength;
699 if (!Line.First.Children.empty())
Daniel Jasper01786732013-02-04 07:21:18 +0000700 calculateFormattingInformation(Line.First.Children[0]);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000701}
702
Daniel Jasper01786732013-02-04 07:21:18 +0000703void TokenAnnotator::calculateFormattingInformation(AnnotatedToken &Current) {
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000704 Current.SpaceRequiredBefore = spaceRequiredBefore(Current);
705
706 if (Current.FormatTok.MustBreakBefore) {
707 Current.MustBreakBefore = true;
Daniel Jasper2752ff32013-02-04 07:32:14 +0000708 } else if (Current.Type == TT_LineComment) {
709 Current.MustBreakBefore = Current.FormatTok.NewlinesBefore > 0;
710 } else if ((Current.Parent->is(tok::comment) &&
711 Current.FormatTok.NewlinesBefore > 0) ||
712 (Current.is(tok::string_literal) &&
713 Current.Parent->is(tok::string_literal))) {
714 Current.MustBreakBefore = true;
Daniel Jasperfa543ac2013-02-04 07:34:48 +0000715 } else if (Current.is(tok::lessless) && !Current.Children.empty() &&
716 Current.Parent->is(tok::string_literal) &&
717 Current.Children[0].is(tok::string_literal)) {
718 Current.MustBreakBefore = true;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000719 } else {
Daniel Jasper2752ff32013-02-04 07:32:14 +0000720 Current.MustBreakBefore = false;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000721 }
722 Current.CanBreakBefore = Current.MustBreakBefore || canBreakBefore(Current);
723 if (Current.MustBreakBefore)
724 Current.TotalLength = Current.Parent->TotalLength + Style.ColumnLimit;
725 else
726 Current.TotalLength =
727 Current.Parent->TotalLength + Current.FormatTok.TokenLength +
728 (Current.SpaceRequiredBefore ? 1 : 0);
729 // FIXME: Only calculate this if CanBreakBefore is true once static
730 // initializers etc. are sorted out.
Daniel Jasper01786732013-02-04 07:21:18 +0000731 // FIXME: Move magic numbers to a better place.
732 Current.SplitPenalty = 20 * Current.BindingStrength + splitPenalty(Current);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000733 if (!Current.Children.empty())
Daniel Jasper01786732013-02-04 07:21:18 +0000734 calculateFormattingInformation(Current.Children[0]);
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000735}
736
737unsigned TokenAnnotator::splitPenalty(const AnnotatedToken &Tok) {
738 const AnnotatedToken &Left = *Tok.Parent;
739 const AnnotatedToken &Right = Tok;
740
741 if (Left.is(tok::l_brace) && Right.isNot(tok::l_brace))
742 return 50;
743 if (Left.is(tok::equal) && Right.is(tok::l_brace))
744 return 150;
745 if (Left.is(tok::coloncolon))
746 return 500;
747
748 if (Left.Type == TT_RangeBasedForLoopColon)
749 return 5;
750
751 if (Right.is(tok::arrow) || Right.is(tok::period)) {
752 if (Left.is(tok::r_paren) && Line.Type == LT_BuilderTypeCall)
753 return 5; // Should be smaller than breaking at a nested comma.
754 return 150;
755 }
756
757 // In for-loops, prefer breaking at ',' and ';'.
758 if (Line.First.is(tok::kw_for) &&
759 (Left.isNot(tok::comma) && Left.isNot(tok::semi)))
760 return 20;
761
Daniel Jasper8159d2f2013-02-04 07:30:30 +0000762 if (Left.is(tok::semi))
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000763 return 0;
Daniel Jasper8159d2f2013-02-04 07:30:30 +0000764 if (Left.is(tok::comma))
765 return 1;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000766
767 // In Objective-C method expressions, prefer breaking before "param:" over
768 // breaking after it.
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000769 if (Right.Type == TT_ObjCSelectorName)
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000770 return 0;
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000771 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000772 return 20;
773
Daniel Jasper01786732013-02-04 07:21:18 +0000774 if (Left.is(tok::l_paren) || Left.is(tok::l_square) ||
775 Left.Type == TT_TemplateOpener)
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000776 return 20;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000777
Daniel Jasper01786732013-02-04 07:21:18 +0000778 if (Right.is(tok::lessless))
779 return prec::Shift;
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000780 if (Left.Type == TT_ConditionalExpr)
781 return prec::Assignment;
782 prec::Level Level = getPrecedence(Left);
783
784 if (Level != prec::Unknown)
785 return Level;
786
787 return 3;
788}
789
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000790bool TokenAnnotator::spaceRequiredBetween(const AnnotatedToken &Left,
791 const AnnotatedToken &Right) {
792 if (Right.is(tok::hashhash))
793 return Left.is(tok::hash);
794 if (Left.is(tok::hashhash) || Left.is(tok::hash))
795 return Right.is(tok::hash);
796 if (Right.is(tok::r_paren) || Right.is(tok::semi) || Right.is(tok::comma))
797 return false;
798 if (Right.is(tok::less) &&
799 (Left.is(tok::kw_template) ||
800 (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
801 return true;
802 if (Left.is(tok::arrow) || Right.is(tok::arrow))
803 return false;
804 if (Left.is(tok::exclaim) || Left.is(tok::tilde))
805 return false;
806 if (Left.is(tok::at) &&
807 (Right.is(tok::identifier) || Right.is(tok::string_literal) ||
808 Right.is(tok::char_constant) || Right.is(tok::numeric_constant) ||
809 Right.is(tok::l_paren) || Right.is(tok::l_brace) ||
810 Right.is(tok::kw_true) || Right.is(tok::kw_false)))
811 return false;
812 if (Left.is(tok::coloncolon))
813 return false;
814 if (Right.is(tok::coloncolon))
815 return Left.isNot(tok::identifier) && Left.isNot(tok::greater);
816 if (Left.is(tok::less) || Right.is(tok::greater) || Right.is(tok::less))
817 return false;
818 if (Right.is(tok::amp) || Right.is(tok::star))
819 return Left.FormatTok.Tok.isLiteral() ||
820 (Left.isNot(tok::star) && Left.isNot(tok::amp) &&
821 !Style.PointerAndReferenceBindToType);
822 if (Left.is(tok::amp) || Left.is(tok::star))
823 return Right.FormatTok.Tok.isLiteral() ||
824 Style.PointerAndReferenceBindToType;
825 if (Right.is(tok::star) && Left.is(tok::l_paren))
826 return false;
827 if (Left.is(tok::l_square) || Right.is(tok::r_square))
828 return false;
829 if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr)
830 return false;
831 if (Left.is(tok::period) || Right.is(tok::period))
832 return false;
833 if (Left.is(tok::colon))
834 return Left.Type != TT_ObjCMethodExpr;
835 if (Right.is(tok::colon))
836 return Right.Type != TT_ObjCMethodExpr;
837 if (Left.is(tok::l_paren))
838 return false;
839 if (Right.is(tok::l_paren)) {
840 return Line.Type == LT_ObjCDecl || Left.is(tok::kw_if) ||
841 Left.is(tok::kw_for) || Left.is(tok::kw_while) ||
842 Left.is(tok::kw_switch) || Left.is(tok::kw_return) ||
843 Left.is(tok::kw_catch) || Left.is(tok::kw_new) ||
844 Left.is(tok::kw_delete);
845 }
846 if (Left.is(tok::at) &&
847 Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword)
848 return false;
849 if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
850 return false;
851 return true;
852}
853
854bool TokenAnnotator::spaceRequiredBefore(const AnnotatedToken &Tok) {
855 if (Line.Type == LT_ObjCMethodDecl) {
856 if (Tok.is(tok::identifier) && !Tok.Children.empty() &&
857 Tok.Children[0].is(tok::colon) && Tok.Parent->is(tok::identifier))
858 return true;
859 if (Tok.is(tok::colon))
860 return false;
861 if (Tok.Parent->Type == TT_ObjCMethodSpecifier)
862 return true;
863 if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier))
864 // Don't space between ')' and <id>
865 return false;
866 if (Tok.Parent->is(tok::colon) && Tok.is(tok::l_paren))
867 // Don't space between ':' and '('
868 return false;
869 }
870 if (Line.Type == LT_ObjCProperty &&
871 (Tok.is(tok::equal) || Tok.Parent->is(tok::equal)))
872 return false;
873
874 if (Tok.Parent->is(tok::comma))
875 return true;
876 if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen)
877 return true;
878 if (Tok.Type == TT_OverloadedOperator)
879 return Tok.is(tok::identifier) || Tok.is(tok::kw_new) ||
880 Tok.is(tok::kw_delete) || Tok.is(tok::kw_bool);
881 if (Tok.Parent->Type == TT_OverloadedOperator)
882 return false;
883 if (Tok.is(tok::colon))
884 return Line.First.isNot(tok::kw_case) && !Tok.Children.empty() &&
885 Tok.Type != TT_ObjCMethodExpr;
886 if (Tok.Parent->Type == TT_UnaryOperator || Tok.Parent->Type == TT_CastRParen)
887 return false;
888 if (Tok.Type == TT_UnaryOperator)
889 return Tok.Parent->isNot(tok::l_paren) &&
890 Tok.Parent->isNot(tok::l_square) && Tok.Parent->isNot(tok::at) &&
891 (Tok.Parent->isNot(tok::colon) ||
892 Tok.Parent->Type != TT_ObjCMethodExpr);
893 if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) {
894 return Tok.Type == TT_TemplateCloser && Tok.Parent->Type ==
895 TT_TemplateCloser && Style.SplitTemplateClosingGreater;
896 }
897 if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator)
898 return true;
899 if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
900 return false;
901 if (Tok.is(tok::less) && Line.First.is(tok::hash))
902 return true;
903 if (Tok.Type == TT_TrailingUnaryOperator)
904 return false;
905 return spaceRequiredBetween(*Tok.Parent, Tok);
906}
907
908bool TokenAnnotator::canBreakBefore(const AnnotatedToken &Right) {
909 const AnnotatedToken &Left = *Right.Parent;
910 if (Line.Type == LT_ObjCMethodDecl) {
911 if (Right.is(tok::identifier) && !Right.Children.empty() &&
912 Right.Children[0].is(tok::colon) && Left.is(tok::identifier))
913 return true;
914 if (Right.is(tok::identifier) && Left.is(tok::l_paren) &&
915 Left.Parent->is(tok::colon))
916 // Don't break this identifier as ':' or identifier
917 // before it will break.
918 return false;
919 if (Right.is(tok::colon) && Left.is(tok::identifier) && Left.CanBreakBefore)
920 // Don't break at ':' if identifier before it can beak.
921 return false;
922 }
923 if (Right.Type == TT_StartOfName && Style.AllowReturnTypeOnItsOwnLine)
924 return true;
925 if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
926 return false;
927 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
928 return true;
Daniel Jasper63d7ced2013-02-05 10:07:47 +0000929 if (Right.Type == TT_ObjCSelectorName)
Daniel Jasper32d28ee2013-01-29 21:01:14 +0000930 return true;
931 if (Left.ClosesTemplateDeclaration)
932 return true;
933 if (Right.Type == TT_ConditionalExpr || Right.is(tok::question))
934 return true;
935 if (Left.Type == TT_RangeBasedForLoopColon)
936 return true;
937 if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
938 Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr ||
939 Left.is(tok::question))
940 return false;
941 if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
942 return false;
943
944 if (Right.Type == TT_LineComment)
945 // We rely on MustBreakBefore being set correctly here as we should not
946 // change the "binding" behavior of a comment.
947 return false;
948
949 // Allow breaking after a trailing 'const', e.g. after a method declaration,
950 // unless it is follow by ';', '{' or '='.
951 if (Left.is(tok::kw_const) && Left.Parent != NULL &&
952 Left.Parent->is(tok::r_paren))
953 return Right.isNot(tok::l_brace) && Right.isNot(tok::semi) &&
954 Right.isNot(tok::equal);
955
956 // We only break before r_brace if there was a corresponding break before
957 // the l_brace, which is tracked by BreakBeforeClosingBrace.
958 if (Right.is(tok::r_brace))
959 return false;
960
961 if (Right.is(tok::r_paren) || Right.is(tok::greater))
962 return false;
963 return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) ||
964 Left.is(tok::comma) || Right.is(tok::lessless) ||
965 Right.is(tok::arrow) || Right.is(tok::period) ||
966 Right.is(tok::colon) || Left.is(tok::coloncolon) ||
967 Left.is(tok::semi) || Left.is(tok::l_brace) ||
968 (Left.is(tok::r_paren) && Left.Type != TT_CastRParen &&
969 Right.is(tok::identifier)) ||
970 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
971 (Left.is(tok::l_square) && !Right.is(tok::r_square));
972}
973
974} // namespace format
975} // namespace clang