blob: 29cbc3624cd940eb83cd8ca77e1df0fe6b0ec534 [file] [log] [blame]
Daniel Jasperbac016b2012-12-03 18:12:45 +00001//===--- Format.cpp - Format C++ code -------------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements functions declared in Format.h. This will be
12/// split into separate files as we go.
13///
14/// This is EXPERIMENTAL code under heavy development. It is not in a state yet,
15/// where it can be used to format real code.
16///
17//===----------------------------------------------------------------------===//
18
19#include "clang/Format/Format.h"
Chandler Carruth55fc8732012-12-04 09:13:33 +000020#include "UnwrappedLineParser.h"
Daniel Jasperbac016b2012-12-03 18:12:45 +000021#include "clang/Basic/SourceManager.h"
22#include "clang/Lex/Lexer.h"
23
Daniel Jasper8822d3a2012-12-04 13:02:32 +000024#include <string>
25
Daniel Jasperbac016b2012-12-03 18:12:45 +000026namespace clang {
27namespace format {
28
29// FIXME: Move somewhere sane.
30struct TokenAnnotation {
Daniel Jasper33182dd2012-12-05 14:57:28 +000031 enum TokenType {
32 TT_Unknown,
33 TT_TemplateOpener,
34 TT_TemplateCloser,
35 TT_BinaryOperator,
36 TT_UnaryOperator,
37 TT_OverloadedOperator,
38 TT_PointerOrReference,
39 TT_ConditionalExpr,
Daniel Jasper1321eb52012-12-18 21:05:13 +000040 TT_CtorInitializerColon,
Daniel Jasper33182dd2012-12-05 14:57:28 +000041 TT_LineComment,
Fariborz Jahanian154120c2012-12-20 19:54:13 +000042 TT_BlockComment,
43 TT_ObjCMethodSpecifier
Daniel Jasper33182dd2012-12-05 14:57:28 +000044 };
Daniel Jasperbac016b2012-12-03 18:12:45 +000045
46 TokenType Type;
47
Daniel Jasperbac016b2012-12-03 18:12:45 +000048 bool SpaceRequiredBefore;
49 bool CanBreakBefore;
50 bool MustBreakBefore;
51};
52
53using llvm::MutableArrayRef;
54
55FormatStyle getLLVMStyle() {
56 FormatStyle LLVMStyle;
57 LLVMStyle.ColumnLimit = 80;
58 LLVMStyle.MaxEmptyLinesToKeep = 1;
59 LLVMStyle.PointerAndReferenceBindToType = false;
60 LLVMStyle.AccessModifierOffset = -2;
61 LLVMStyle.SplitTemplateClosingGreater = true;
Alexander Kornienko15757312012-12-06 18:03:27 +000062 LLVMStyle.IndentCaseLabels = false;
Daniel Jasperbac016b2012-12-03 18:12:45 +000063 return LLVMStyle;
64}
65
66FormatStyle getGoogleStyle() {
67 FormatStyle GoogleStyle;
68 GoogleStyle.ColumnLimit = 80;
69 GoogleStyle.MaxEmptyLinesToKeep = 1;
70 GoogleStyle.PointerAndReferenceBindToType = true;
71 GoogleStyle.AccessModifierOffset = -1;
72 GoogleStyle.SplitTemplateClosingGreater = false;
Alexander Kornienko15757312012-12-06 18:03:27 +000073 GoogleStyle.IndentCaseLabels = true;
Daniel Jasperbac016b2012-12-03 18:12:45 +000074 return GoogleStyle;
75}
76
77struct OptimizationParameters {
Daniel Jasperbac016b2012-12-03 18:12:45 +000078 unsigned PenaltyIndentLevel;
79};
80
81class UnwrappedLineFormatter {
82public:
83 UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr,
84 const UnwrappedLine &Line,
85 const std::vector<TokenAnnotation> &Annotations,
Alexander Kornienkocff563c2012-12-04 17:27:50 +000086 tooling::Replacements &Replaces, bool StructuralError)
Daniel Jasper1321eb52012-12-18 21:05:13 +000087 : Style(Style), SourceMgr(SourceMgr), Line(Line),
88 Annotations(Annotations), Replaces(Replaces),
Alexander Kornienkocff563c2012-12-04 17:27:50 +000089 StructuralError(StructuralError) {
Daniel Jasperbac016b2012-12-03 18:12:45 +000090 Parameters.PenaltyIndentLevel = 5;
91 }
92
93 void format() {
Daniel Jasper3b5943f2012-12-06 09:56:08 +000094 // Format first token and initialize indent.
Alexander Kornienkocff563c2012-12-04 17:27:50 +000095 unsigned Indent = formatFirstToken();
Daniel Jasper3b5943f2012-12-06 09:56:08 +000096
97 // Initialize state dependent on indent.
Daniel Jasperbac016b2012-12-03 18:12:45 +000098 IndentState State;
Daniel Jasper3b5943f2012-12-06 09:56:08 +000099 State.Column = Indent;
Daniel Jasper3b5943f2012-12-06 09:56:08 +0000100 State.ConsumedTokens = 0;
Alexander Kornienkocff563c2012-12-04 17:27:50 +0000101 State.Indent.push_back(Indent + 4);
102 State.LastSpace.push_back(Indent);
Daniel Jasper3b5943f2012-12-06 09:56:08 +0000103 State.FirstLessLess.push_back(0);
104
105 // The first token has already been indented and thus consumed.
106 moveStateToNextToken(State);
Daniel Jasperbac016b2012-12-03 18:12:45 +0000107
Daniel Jasper1321eb52012-12-18 21:05:13 +0000108 // Check whether the UnwrappedLine can be put onto a single line. If so,
109 // this is bound to be the optimal solution (by definition) and we don't
110 // need to analyze the entire solution space.
111 unsigned Columns = State.Column;
112 bool FitsOnALine = true;
113 for (unsigned i = 1, n = Line.Tokens.size(); i != n; ++i) {
114 Columns += (Annotations[i].SpaceRequiredBefore ? 1 : 0) +
115 Line.Tokens[i].Tok.getLength();
116 // A special case for the colon of a constructor initializer as this only
117 // needs to be put on a new line if the line needs to be split.
118 if (Columns > Style.ColumnLimit ||
119 (Annotations[i].MustBreakBefore &&
120 Annotations[i].Type != TokenAnnotation::TT_CtorInitializerColon)) {
121 FitsOnALine = false;
122 break;
123 }
124 }
125
Daniel Jasperbac016b2012-12-03 18:12:45 +0000126 // Start iterating at 1 as we have correctly formatted of Token #0 above.
127 for (unsigned i = 1, n = Line.Tokens.size(); i != n; ++i) {
Daniel Jasper1321eb52012-12-18 21:05:13 +0000128 if (FitsOnALine) {
129 addTokenToState(false, false, State);
130 } else {
131 unsigned NoBreak = calcPenalty(State, false, UINT_MAX);
132 unsigned Break = calcPenalty(State, true, NoBreak);
133 addTokenToState(Break < NoBreak, false, State);
134 }
Daniel Jasperbac016b2012-12-03 18:12:45 +0000135 }
136 }
137
138private:
139 /// \brief The current state when indenting a unwrapped line.
140 ///
141 /// As the indenting tries different combinations this is copied by value.
142 struct IndentState {
143 /// \brief The number of used columns in the current line.
144 unsigned Column;
145
146 /// \brief The number of tokens already consumed.
147 unsigned ConsumedTokens;
148
149 /// \brief The position to which a specific parenthesis level needs to be
150 /// indented.
151 std::vector<unsigned> Indent;
152
Daniel Jasper3b5943f2012-12-06 09:56:08 +0000153 /// \brief The position of the last space on each level.
154 ///
155 /// Used e.g. to break like:
156 /// functionCall(Parameter, otherCall(
157 /// OtherParameter));
Daniel Jasperbac016b2012-12-03 18:12:45 +0000158 std::vector<unsigned> LastSpace;
159
Daniel Jasper3b5943f2012-12-06 09:56:08 +0000160 /// \brief The position the first "<<" operator encountered on each level.
161 ///
162 /// Used to align "<<" operators. 0 if no such operator has been encountered
163 /// on a level.
164 std::vector<unsigned> FirstLessLess;
165
Daniel Jasperbac016b2012-12-03 18:12:45 +0000166 /// \brief Comparison operator to be able to used \c IndentState in \c map.
167 bool operator<(const IndentState &Other) const {
168 if (Other.ConsumedTokens != ConsumedTokens)
169 return Other.ConsumedTokens > ConsumedTokens;
170 if (Other.Column != Column)
171 return Other.Column > Column;
172 if (Other.Indent.size() != Indent.size())
173 return Other.Indent.size() > Indent.size();
174 for (int i = 0, e = Indent.size(); i != e; ++i) {
175 if (Other.Indent[i] != Indent[i])
176 return Other.Indent[i] > Indent[i];
177 }
178 if (Other.LastSpace.size() != LastSpace.size())
179 return Other.LastSpace.size() > LastSpace.size();
180 for (int i = 0, e = LastSpace.size(); i != e; ++i) {
181 if (Other.LastSpace[i] != LastSpace[i])
182 return Other.LastSpace[i] > LastSpace[i];
183 }
Daniel Jasper3b5943f2012-12-06 09:56:08 +0000184 if (Other.FirstLessLess.size() != FirstLessLess.size())
185 return Other.FirstLessLess.size() > FirstLessLess.size();
186 for (int i = 0, e = FirstLessLess.size(); i != e; ++i) {
187 if (Other.FirstLessLess[i] != FirstLessLess[i])
188 return Other.FirstLessLess[i] > FirstLessLess[i];
189 }
Daniel Jasperbac016b2012-12-03 18:12:45 +0000190 return false;
191 }
192 };
193
Daniel Jasper20409152012-12-04 14:54:30 +0000194 /// \brief Appends the next token to \p State and updates information
195 /// necessary for indentation.
196 ///
197 /// Puts the token on the current line if \p Newline is \c true and adds a
198 /// line break and necessary indentation otherwise.
199 ///
200 /// If \p DryRun is \c false, also creates and stores the required
201 /// \c Replacement.
202 void addTokenToState(bool Newline, bool DryRun, IndentState &State) {
Daniel Jasperbac016b2012-12-03 18:12:45 +0000203 unsigned Index = State.ConsumedTokens;
204 const FormatToken &Current = Line.Tokens[Index];
205 const FormatToken &Previous = Line.Tokens[Index - 1];
Daniel Jasper20409152012-12-04 14:54:30 +0000206 unsigned ParenLevel = State.Indent.size() - 1;
Daniel Jasperbac016b2012-12-03 18:12:45 +0000207
208 if (Newline) {
209 if (Current.Tok.is(tok::string_literal) &&
210 Previous.Tok.is(tok::string_literal))
211 State.Column = State.Column - Previous.Tok.getLength();
Daniel Jasper3b5943f2012-12-06 09:56:08 +0000212 else if (Current.Tok.is(tok::lessless) &&
213 State.FirstLessLess[ParenLevel] != 0)
214 State.Column = State.FirstLessLess[ParenLevel];
Daniel Jasper9a0b4942012-12-17 14:34:14 +0000215 else if (ParenLevel != 0 &&
216 (Previous.Tok.is(tok::equal) || Current.Tok.is(tok::arrow) ||
217 Current.Tok.is(tok::period)))
Daniel Jasper20409152012-12-04 14:54:30 +0000218 // Indent and extra 4 spaces after '=' as it continues an expression.
219 // Don't do that on the top level, as we already indent 4 there.
Daniel Jasperbac016b2012-12-03 18:12:45 +0000220 State.Column = State.Indent[ParenLevel] + 4;
Daniel Jaspera88bb452012-12-04 10:50:12 +0000221 else
Daniel Jasperbac016b2012-12-03 18:12:45 +0000222 State.Column = State.Indent[ParenLevel];
Daniel Jasper20409152012-12-04 14:54:30 +0000223
Daniel Jasperbac016b2012-12-03 18:12:45 +0000224 if (!DryRun)
225 replaceWhitespace(Current, 1, State.Column);
226
Daniel Jaspera88bb452012-12-04 10:50:12 +0000227 State.LastSpace[ParenLevel] = State.Indent[ParenLevel];
Daniel Jasperbac016b2012-12-03 18:12:45 +0000228 if (Current.Tok.is(tok::colon) &&
Fariborz Jahanian154120c2012-12-20 19:54:13 +0000229 Annotations[Index].Type != TokenAnnotation::TT_ConditionalExpr &&
230 Annotations[0].Type != TokenAnnotation::TT_ObjCMethodSpecifier)
Daniel Jasperbac016b2012-12-03 18:12:45 +0000231 State.Indent[ParenLevel] += 2;
Daniel Jasperbac016b2012-12-03 18:12:45 +0000232 } else {
233 unsigned Spaces = Annotations[Index].SpaceRequiredBefore ? 1 : 0;
234 if (Annotations[Index].Type == TokenAnnotation::TT_LineComment)
235 Spaces = 2;
Daniel Jasper20409152012-12-04 14:54:30 +0000236
Daniel Jasperbac016b2012-12-03 18:12:45 +0000237 if (!DryRun)
238 replaceWhitespace(Current, 0, Spaces);
Daniel Jasper20409152012-12-04 14:54:30 +0000239
240 if (Previous.Tok.is(tok::l_paren) ||
241 Annotations[Index - 1].Type == TokenAnnotation::TT_TemplateOpener)
Daniel Jasperbac016b2012-12-03 18:12:45 +0000242 State.Indent[ParenLevel] = State.Column;
Daniel Jasper1321eb52012-12-18 21:05:13 +0000243
Daniel Jasperbac016b2012-12-03 18:12:45 +0000244 // Top-level spaces are exempt as that mostly leads to better results.
Daniel Jasper3b5943f2012-12-06 09:56:08 +0000245 State.Column += Spaces;
Daniel Jaspera88bb452012-12-04 10:50:12 +0000246 if (Spaces > 0 && ParenLevel != 0)
Daniel Jasper3b5943f2012-12-06 09:56:08 +0000247 State.LastSpace[ParenLevel] = State.Column;
Daniel Jasperbac016b2012-12-03 18:12:45 +0000248 }
Daniel Jasper20409152012-12-04 14:54:30 +0000249 moveStateToNextToken(State);
250 }
Daniel Jasperbac016b2012-12-03 18:12:45 +0000251
Daniel Jasper20409152012-12-04 14:54:30 +0000252 /// \brief Mark the next token as consumed in \p State and modify its stacks
253 /// accordingly.
254 void moveStateToNextToken(IndentState &State) {
255 unsigned Index = State.ConsumedTokens;
256 const FormatToken &Current = Line.Tokens[Index];
Daniel Jasper3b5943f2012-12-06 09:56:08 +0000257 unsigned ParenLevel = State.Indent.size() - 1;
258
259 if (Current.Tok.is(tok::lessless) && State.FirstLessLess[ParenLevel] == 0)
260 State.FirstLessLess[ParenLevel] = State.Column;
261
262 State.Column += Current.Tok.getLength();
Daniel Jasper20409152012-12-04 14:54:30 +0000263
264 // If we encounter an opening (, [ or <, we add a level to our stacks to
265 // prepare for the following tokens.
266 if (Current.Tok.is(tok::l_paren) || Current.Tok.is(tok::l_square) ||
267 Annotations[Index].Type == TokenAnnotation::TT_TemplateOpener) {
268 State.Indent.push_back(4 + State.LastSpace.back());
269 State.LastSpace.push_back(State.LastSpace.back());
Daniel Jasper3b5943f2012-12-06 09:56:08 +0000270 State.FirstLessLess.push_back(0);
Daniel Jasper20409152012-12-04 14:54:30 +0000271 }
272
273 // If we encounter a closing ), ] or >, we can remove a level from our
274 // stacks.
Daniel Jaspera88bb452012-12-04 10:50:12 +0000275 if (Current.Tok.is(tok::r_paren) || Current.Tok.is(tok::r_square) ||
276 Annotations[Index].Type == TokenAnnotation::TT_TemplateCloser) {
Daniel Jasperbac016b2012-12-03 18:12:45 +0000277 State.Indent.pop_back();
278 State.LastSpace.pop_back();
Daniel Jasper3b5943f2012-12-06 09:56:08 +0000279 State.FirstLessLess.pop_back();
Daniel Jasperbac016b2012-12-03 18:12:45 +0000280 }
281
282 ++State.ConsumedTokens;
283 }
284
Daniel Jasper9a0b4942012-12-17 14:34:14 +0000285 /// \brief Calculate the panelty for splitting after the token at \p Index.
286 unsigned splitPenalty(unsigned Index) {
287 assert(Index < Line.Tokens.size() &&
288 "Tried to calculate penalty for splitting after the last token");
289 const FormatToken &Left = Line.Tokens[Index];
290 const FormatToken &Right = Line.Tokens[Index + 1];
Daniel Jasper1321eb52012-12-18 21:05:13 +0000291 if (Left.Tok.is(tok::semi) || Left.Tok.is(tok::comma))
Daniel Jasperbac016b2012-12-03 18:12:45 +0000292 return 0;
Daniel Jasper9a0b4942012-12-17 14:34:14 +0000293 if (Left.Tok.is(tok::equal) || Left.Tok.is(tok::l_paren) ||
294 Left.Tok.is(tok::pipepipe) || Left.Tok.is(tok::ampamp))
Daniel Jasperbac016b2012-12-03 18:12:45 +0000295 return 2;
Daniel Jasper9a0b4942012-12-17 14:34:14 +0000296
297 if (Right.Tok.is(tok::arrow) || Right.Tok.is(tok::period))
298 return 200;
299
Daniel Jasperbac016b2012-12-03 18:12:45 +0000300 return 3;
301 }
302
303 /// \brief Calculate the number of lines needed to format the remaining part
304 /// of the unwrapped line.
305 ///
306 /// Assumes the formatting so far has led to
307 /// the \c IndentState \p State. If \p NewLine is set, a new line will be
308 /// added after the previous token.
309 ///
310 /// \param StopAt is used for optimization. If we can determine that we'll
311 /// definitely need at least \p StopAt additional lines, we already know of a
312 /// better solution.
313 unsigned calcPenalty(IndentState State, bool NewLine, unsigned StopAt) {
314 // We are at the end of the unwrapped line, so we don't need any more lines.
315 if (State.ConsumedTokens >= Line.Tokens.size())
316 return 0;
317
318 if (!NewLine && Annotations[State.ConsumedTokens].MustBreakBefore)
319 return UINT_MAX;
320 if (NewLine && !Annotations[State.ConsumedTokens].CanBreakBefore)
321 return UINT_MAX;
322
Daniel Jasper33182dd2012-12-05 14:57:28 +0000323 unsigned CurrentPenalty = 0;
324 if (NewLine) {
325 CurrentPenalty += Parameters.PenaltyIndentLevel * State.Indent.size() +
Daniel Jasper1321eb52012-12-18 21:05:13 +0000326 splitPenalty(State.ConsumedTokens - 1);
Daniel Jasper33182dd2012-12-05 14:57:28 +0000327 }
328
Daniel Jasper20409152012-12-04 14:54:30 +0000329 addTokenToState(NewLine, true, State);
Daniel Jasperbac016b2012-12-03 18:12:45 +0000330
331 // Exceeding column limit is bad.
332 if (State.Column > Style.ColumnLimit)
333 return UINT_MAX;
334
Daniel Jasperbac016b2012-12-03 18:12:45 +0000335 if (StopAt <= CurrentPenalty)
336 return UINT_MAX;
337 StopAt -= CurrentPenalty;
338
Daniel Jasperbac016b2012-12-03 18:12:45 +0000339 StateMap::iterator I = Memory.find(State);
Daniel Jasper33182dd2012-12-05 14:57:28 +0000340 if (I != Memory.end()) {
341 // If this state has already been examined, we can safely return the
342 // previous result if we
343 // - have not hit the optimatization (and thus returned UINT_MAX) OR
344 // - are now computing for a smaller or equal StopAt.
345 unsigned SavedResult = I->second.first;
346 unsigned SavedStopAt = I->second.second;
Daniel Jasper9a0b4942012-12-17 14:34:14 +0000347 if (SavedResult != UINT_MAX)
348 return SavedResult + CurrentPenalty;
349 else if (StopAt <= SavedStopAt)
350 return UINT_MAX;
Daniel Jasper33182dd2012-12-05 14:57:28 +0000351 }
Daniel Jasperbac016b2012-12-03 18:12:45 +0000352
353 unsigned NoBreak = calcPenalty(State, false, StopAt);
354 unsigned WithBreak = calcPenalty(State, true, std::min(StopAt, NoBreak));
355 unsigned Result = std::min(NoBreak, WithBreak);
Daniel Jasper9a0b4942012-12-17 14:34:14 +0000356
357 // We have to store 'Result' without adding 'CurrentPenalty' as the latter
358 // can depend on 'NewLine'.
Daniel Jasper33182dd2012-12-05 14:57:28 +0000359 Memory[State] = std::pair<unsigned, unsigned>(Result, StopAt);
Daniel Jasper9a0b4942012-12-17 14:34:14 +0000360
361 return Result == UINT_MAX ? UINT_MAX : Result + CurrentPenalty;
Daniel Jasperbac016b2012-12-03 18:12:45 +0000362 }
363
364 /// \brief Replaces the whitespace in front of \p Tok. Only call once for
365 /// each \c FormatToken.
366 void replaceWhitespace(const FormatToken &Tok, unsigned NewLines,
367 unsigned Spaces) {
368 Replaces.insert(tooling::Replacement(
369 SourceMgr, Tok.WhiteSpaceStart, Tok.WhiteSpaceLength,
370 std::string(NewLines, '\n') + std::string(Spaces, ' ')));
371 }
372
373 /// \brief Add a new line and the required indent before the first Token
Alexander Kornienko720ffb62012-12-05 13:56:52 +0000374 /// of the \c UnwrappedLine if there was no structural parsing error.
375 /// Returns the indent level of the \c UnwrappedLine.
Alexander Kornienkocff563c2012-12-04 17:27:50 +0000376 unsigned formatFirstToken() {
Daniel Jasperbac016b2012-12-03 18:12:45 +0000377 const FormatToken &Token = Line.Tokens[0];
Alexander Kornienkocff563c2012-12-04 17:27:50 +0000378 if (!Token.WhiteSpaceStart.isValid() || StructuralError)
379 return SourceMgr.getSpellingColumnNumber(Token.Tok.getLocation()) - 1;
380
381 unsigned Newlines =
382 std::min(Token.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
383 unsigned Offset = SourceMgr.getFileOffset(Token.WhiteSpaceStart);
384 if (Newlines == 0 && Offset != 0)
385 Newlines = 1;
386 unsigned Indent = Line.Level * 2;
Alexander Kornienko56e49c52012-12-10 16:34:48 +0000387 if ((Token.Tok.is(tok::kw_public) || Token.Tok.is(tok::kw_protected) ||
388 Token.Tok.is(tok::kw_private)) &&
389 static_cast<int>(Indent) + Style.AccessModifierOffset >= 0)
Alexander Kornienkocff563c2012-12-04 17:27:50 +0000390 Indent += Style.AccessModifierOffset;
391 replaceWhitespace(Token, Newlines, Indent);
392 return Indent;
Daniel Jasperbac016b2012-12-03 18:12:45 +0000393 }
394
395 FormatStyle Style;
396 SourceManager &SourceMgr;
397 const UnwrappedLine &Line;
398 const std::vector<TokenAnnotation> &Annotations;
399 tooling::Replacements &Replaces;
Alexander Kornienkocff563c2012-12-04 17:27:50 +0000400 bool StructuralError;
Daniel Jasperbac016b2012-12-03 18:12:45 +0000401
Daniel Jasper33182dd2012-12-05 14:57:28 +0000402 // A map from an indent state to a pair (Result, Used-StopAt).
403 typedef std::map<IndentState, std::pair<unsigned, unsigned> > StateMap;
404 StateMap Memory;
405
Daniel Jasperbac016b2012-12-03 18:12:45 +0000406 OptimizationParameters Parameters;
407};
408
409/// \brief Determines extra information about the tokens comprising an
410/// \c UnwrappedLine.
411class TokenAnnotator {
412public:
413 TokenAnnotator(const UnwrappedLine &Line, const FormatStyle &Style,
414 SourceManager &SourceMgr)
Daniel Jasper1321eb52012-12-18 21:05:13 +0000415 : Line(Line), Style(Style), SourceMgr(SourceMgr) {
Daniel Jasperbac016b2012-12-03 18:12:45 +0000416 }
417
418 /// \brief A parser that gathers additional information about tokens.
419 ///
420 /// The \c TokenAnnotator tries to matches parenthesis and square brakets and
421 /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
422 /// into template parameter lists.
423 class AnnotatingParser {
424 public:
Manuel Klimek0be4b362012-12-03 20:55:42 +0000425 AnnotatingParser(const SmallVector<FormatToken, 16> &Tokens,
Daniel Jasperbac016b2012-12-03 18:12:45 +0000426 std::vector<TokenAnnotation> &Annotations)
Daniel Jasper1321eb52012-12-18 21:05:13 +0000427 : Tokens(Tokens), Annotations(Annotations), Index(0) {
Daniel Jasperbac016b2012-12-03 18:12:45 +0000428 }
429
Daniel Jasper20409152012-12-04 14:54:30 +0000430 bool parseAngle() {
Daniel Jasperbac016b2012-12-03 18:12:45 +0000431 while (Index < Tokens.size()) {
432 if (Tokens[Index].Tok.is(tok::greater)) {
Daniel Jaspera88bb452012-12-04 10:50:12 +0000433 Annotations[Index].Type = TokenAnnotation::TT_TemplateCloser;
Daniel Jasperbac016b2012-12-03 18:12:45 +0000434 next();
435 return true;
436 }
437 if (Tokens[Index].Tok.is(tok::r_paren) ||
438 Tokens[Index].Tok.is(tok::r_square))
439 return false;
440 if (Tokens[Index].Tok.is(tok::pipepipe) ||
441 Tokens[Index].Tok.is(tok::ampamp) ||
442 Tokens[Index].Tok.is(tok::question) ||
443 Tokens[Index].Tok.is(tok::colon))
444 return false;
Daniel Jasper20409152012-12-04 14:54:30 +0000445 consumeToken();
Daniel Jasperbac016b2012-12-03 18:12:45 +0000446 }
447 return false;
448 }
449
Daniel Jasper20409152012-12-04 14:54:30 +0000450 bool parseParens() {
Daniel Jasperbac016b2012-12-03 18:12:45 +0000451 while (Index < Tokens.size()) {
452 if (Tokens[Index].Tok.is(tok::r_paren)) {
Daniel Jasperbac016b2012-12-03 18:12:45 +0000453 next();
454 return true;
455 }
456 if (Tokens[Index].Tok.is(tok::r_square))
457 return false;
Daniel Jasper20409152012-12-04 14:54:30 +0000458 consumeToken();
Daniel Jasperbac016b2012-12-03 18:12:45 +0000459 }
460 return false;
461 }
462
Daniel Jasper20409152012-12-04 14:54:30 +0000463 bool parseSquare() {
Daniel Jasperbac016b2012-12-03 18:12:45 +0000464 while (Index < Tokens.size()) {
465 if (Tokens[Index].Tok.is(tok::r_square)) {
Daniel Jasperbac016b2012-12-03 18:12:45 +0000466 next();
467 return true;
468 }
469 if (Tokens[Index].Tok.is(tok::r_paren))
470 return false;
Daniel Jasper20409152012-12-04 14:54:30 +0000471 consumeToken();
Daniel Jasperbac016b2012-12-03 18:12:45 +0000472 }
473 return false;
474 }
475
Daniel Jasper20409152012-12-04 14:54:30 +0000476 bool parseConditional() {
Daniel Jasperbac016b2012-12-03 18:12:45 +0000477 while (Index < Tokens.size()) {
478 if (Tokens[Index].Tok.is(tok::colon)) {
479 Annotations[Index].Type = TokenAnnotation::TT_ConditionalExpr;
480 next();
481 return true;
482 }
Daniel Jasper20409152012-12-04 14:54:30 +0000483 consumeToken();
Daniel Jasperbac016b2012-12-03 18:12:45 +0000484 }
485 return false;
486 }
487
Daniel Jasper20409152012-12-04 14:54:30 +0000488 void consumeToken() {
Daniel Jasperbac016b2012-12-03 18:12:45 +0000489 unsigned CurrentIndex = Index;
490 next();
491 switch (Tokens[CurrentIndex].Tok.getKind()) {
492 case tok::l_paren:
Daniel Jasper20409152012-12-04 14:54:30 +0000493 parseParens();
Daniel Jasper1321eb52012-12-18 21:05:13 +0000494 if (Index < Tokens.size() && Tokens[Index].Tok.is(tok::colon)) {
495 Annotations[Index].Type = TokenAnnotation::TT_CtorInitializerColon;
496 next();
497 }
Daniel Jasperbac016b2012-12-03 18:12:45 +0000498 break;
499 case tok::l_square:
Daniel Jasper20409152012-12-04 14:54:30 +0000500 parseSquare();
Daniel Jasperbac016b2012-12-03 18:12:45 +0000501 break;
502 case tok::less:
Daniel Jasper20409152012-12-04 14:54:30 +0000503 if (parseAngle())
Daniel Jasperbac016b2012-12-03 18:12:45 +0000504 Annotations[CurrentIndex].Type = TokenAnnotation::TT_TemplateOpener;
505 else {
506 Annotations[CurrentIndex].Type = TokenAnnotation::TT_BinaryOperator;
507 Index = CurrentIndex + 1;
508 }
509 break;
510 case tok::greater:
511 Annotations[CurrentIndex].Type = TokenAnnotation::TT_BinaryOperator;
512 break;
513 case tok::kw_operator:
514 if (!Tokens[Index].Tok.is(tok::l_paren))
515 Annotations[Index].Type = TokenAnnotation::TT_OverloadedOperator;
516 next();
517 break;
518 case tok::question:
Daniel Jasper20409152012-12-04 14:54:30 +0000519 parseConditional();
Daniel Jasperbac016b2012-12-03 18:12:45 +0000520 break;
521 default:
522 break;
523 }
524 }
525
526 void parseLine() {
527 while (Index < Tokens.size()) {
Daniel Jasper20409152012-12-04 14:54:30 +0000528 consumeToken();
Daniel Jasperbac016b2012-12-03 18:12:45 +0000529 }
530 }
531
532 void next() {
533 ++Index;
534 }
535
536 private:
Daniel Jasperbac016b2012-12-03 18:12:45 +0000537 const SmallVector<FormatToken, 16> &Tokens;
538 std::vector<TokenAnnotation> &Annotations;
539 unsigned Index;
540 };
541
542 void annotate() {
543 Annotations.clear();
544 for (int i = 0, e = Line.Tokens.size(); i != e; ++i) {
545 Annotations.push_back(TokenAnnotation());
546 }
547
Manuel Klimek0be4b362012-12-03 20:55:42 +0000548 AnnotatingParser Parser(Line.Tokens, Annotations);
Daniel Jasperbac016b2012-12-03 18:12:45 +0000549 Parser.parseLine();
550
551 determineTokenTypes();
Fariborz Jahanian154120c2012-12-20 19:54:13 +0000552 bool IsObjCMethodDecl =
553 (Line.Tokens.size() > 0 &&
554 (Annotations[0].Type == TokenAnnotation::TT_ObjCMethodSpecifier));
Daniel Jasperbac016b2012-12-03 18:12:45 +0000555 for (int i = 1, e = Line.Tokens.size(); i != e; ++i) {
556 TokenAnnotation &Annotation = Annotations[i];
557
558 Annotation.CanBreakBefore =
559 canBreakBetween(Line.Tokens[i - 1], Line.Tokens[i]);
560
Daniel Jasper1321eb52012-12-18 21:05:13 +0000561 if (Annotation.Type == TokenAnnotation::TT_CtorInitializerColon) {
562 Annotation.MustBreakBefore = true;
563 Annotation.SpaceRequiredBefore = true;
Fariborz Jahanian154120c2012-12-20 19:54:13 +0000564 } else if (IsObjCMethodDecl &&
565 Line.Tokens[i].Tok.is(tok::identifier) &&
566 (i != e-1) && Line.Tokens[i+1].Tok.is(tok::colon) &&
567 Line.Tokens[i-1].Tok.is(tok::identifier)) {
568 Annotation.CanBreakBefore = true;
569 Annotation.SpaceRequiredBefore = true;
570 } else if (IsObjCMethodDecl &&
571 Line.Tokens[i].Tok.is(tok::identifier) &&
572 Line.Tokens[i-1].Tok.is(tok::l_paren) &&
573 Line.Tokens[i-2].Tok.is(tok::colon)) {
574 // Don't break this identifier as ':' or identifier
575 // before it will break.
576 Annotation.CanBreakBefore = false;
577 } else if (Line.Tokens[i].Tok.is(tok::at) &&
578 Line.Tokens[i-2].Tok.is(tok::at)) {
579 // Don't put two objc's '@' on the same line. This could happen,
580 // as in, @optinal @property ...
581 Annotation.MustBreakBefore = true;
Daniel Jasper1321eb52012-12-18 21:05:13 +0000582 } else if (Line.Tokens[i].Tok.is(tok::colon)) {
Daniel Jasperdfbb3192012-12-05 16:24:48 +0000583 Annotation.SpaceRequiredBefore =
Fariborz Jahanian154120c2012-12-20 19:54:13 +0000584 Line.Tokens[0].Tok.isNot(tok::kw_case) && !IsObjCMethodDecl &&
585 (i != e - 1);
586 // Don't break at ':' if identifier before it can beak.
587 if (IsObjCMethodDecl &&
588 Line.Tokens[i-1].Tok.is(tok::identifier) &&
589 Annotations[i-1].CanBreakBefore)
590 Annotation.CanBreakBefore = false;
591 } else if (Annotations[i - 1].Type ==
592 TokenAnnotation::TT_ObjCMethodSpecifier)
593 Annotation.SpaceRequiredBefore = true;
594 else if (Annotations[i - 1].Type == TokenAnnotation::TT_UnaryOperator) {
Daniel Jasperbac016b2012-12-03 18:12:45 +0000595 Annotation.SpaceRequiredBefore = false;
596 } else if (Annotation.Type == TokenAnnotation::TT_UnaryOperator) {
597 Annotation.SpaceRequiredBefore =
Daniel Jasper8822d3a2012-12-04 13:02:32 +0000598 Line.Tokens[i - 1].Tok.isNot(tok::l_paren) &&
599 Line.Tokens[i - 1].Tok.isNot(tok::l_square);
Daniel Jasperbac016b2012-12-03 18:12:45 +0000600 } else if (Line.Tokens[i - 1].Tok.is(tok::greater) &&
601 Line.Tokens[i].Tok.is(tok::greater)) {
Daniel Jasper8822d3a2012-12-04 13:02:32 +0000602 if (Annotation.Type == TokenAnnotation::TT_TemplateCloser &&
Daniel Jasper20409152012-12-04 14:54:30 +0000603 Annotations[i - 1].Type == TokenAnnotation::TT_TemplateCloser)
Daniel Jaspera88bb452012-12-04 10:50:12 +0000604 Annotation.SpaceRequiredBefore = Style.SplitTemplateClosingGreater;
Daniel Jasperbac016b2012-12-03 18:12:45 +0000605 else
606 Annotation.SpaceRequiredBefore = false;
607 } else if (
608 Annotation.Type == TokenAnnotation::TT_BinaryOperator ||
609 Annotations[i - 1].Type == TokenAnnotation::TT_BinaryOperator) {
610 Annotation.SpaceRequiredBefore = true;
611 } else if (
Daniel Jaspera88bb452012-12-04 10:50:12 +0000612 Annotations[i - 1].Type == TokenAnnotation::TT_TemplateCloser &&
Daniel Jasperbac016b2012-12-03 18:12:45 +0000613 Line.Tokens[i].Tok.is(tok::l_paren)) {
614 Annotation.SpaceRequiredBefore = false;
Daniel Jasper8822d3a2012-12-04 13:02:32 +0000615 } else if (Line.Tokens[i].Tok.is(tok::less) &&
616 Line.Tokens[0].Tok.is(tok::hash)) {
617 Annotation.SpaceRequiredBefore = true;
Fariborz Jahanian154120c2012-12-20 19:54:13 +0000618 } else if (IsObjCMethodDecl &&
619 Line.Tokens[i - 1].Tok.is(tok::r_paren) &&
620 Line.Tokens[i].Tok.is(tok::identifier))
621 // Don't space between ')' and <id>
622 Annotation.SpaceRequiredBefore = false;
623 else if (IsObjCMethodDecl &&
624 Line.Tokens[i - 1].Tok.is(tok::colon) &&
625 Line.Tokens[i].Tok.is(tok::l_paren))
626 // Don't space between ':' and '('
627 Annotation.SpaceRequiredBefore = false;
628 else {
Daniel Jasperbac016b2012-12-03 18:12:45 +0000629 Annotation.SpaceRequiredBefore =
630 spaceRequiredBetween(Line.Tokens[i - 1].Tok, Line.Tokens[i].Tok);
631 }
632
633 if (Annotations[i - 1].Type == TokenAnnotation::TT_LineComment ||
634 (Line.Tokens[i].Tok.is(tok::string_literal) &&
635 Line.Tokens[i - 1].Tok.is(tok::string_literal))) {
636 Annotation.MustBreakBefore = true;
637 }
638
639 if (Annotation.MustBreakBefore)
640 Annotation.CanBreakBefore = true;
641 }
642 }
643
644 const std::vector<TokenAnnotation> &getAnnotations() {
645 return Annotations;
646 }
647
648private:
649 void determineTokenTypes() {
Daniel Jasper33182dd2012-12-05 14:57:28 +0000650 bool AssignmentEncountered = false;
Daniel Jasperbac016b2012-12-03 18:12:45 +0000651 for (int i = 0, e = Line.Tokens.size(); i != e; ++i) {
652 TokenAnnotation &Annotation = Annotations[i];
653 const FormatToken &Tok = Line.Tokens[i];
654
Daniel Jasper33182dd2012-12-05 14:57:28 +0000655 if (Tok.Tok.is(tok::equal) || Tok.Tok.is(tok::plusequal) ||
656 Tok.Tok.is(tok::minusequal) || Tok.Tok.is(tok::starequal) ||
657 Tok.Tok.is(tok::slashequal))
658 AssignmentEncountered = true;
Daniel Jasper112fb272012-12-05 07:51:39 +0000659
Daniel Jasperbac016b2012-12-03 18:12:45 +0000660 if (Tok.Tok.is(tok::star) || Tok.Tok.is(tok::amp))
Daniel Jasper33182dd2012-12-05 14:57:28 +0000661 Annotation.Type = determineStarAmpUsage(i, AssignmentEncountered);
Fariborz Jahanian154120c2012-12-20 19:54:13 +0000662 else if ((Tok.Tok.is(tok::minus) || Tok.Tok.is(tok::plus)) &&
663 Tok.Tok.isAtStartOfLine())
664 Annotation.Type = TokenAnnotation::TT_ObjCMethodSpecifier;
Daniel Jasper8822d3a2012-12-04 13:02:32 +0000665 else if (isUnaryOperator(i))
Daniel Jasperbac016b2012-12-03 18:12:45 +0000666 Annotation.Type = TokenAnnotation::TT_UnaryOperator;
667 else if (isBinaryOperator(Line.Tokens[i]))
668 Annotation.Type = TokenAnnotation::TT_BinaryOperator;
669 else if (Tok.Tok.is(tok::comment)) {
670 StringRef Data(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
671 Tok.Tok.getLength());
672 if (Data.startswith("//"))
673 Annotation.Type = TokenAnnotation::TT_LineComment;
674 else
675 Annotation.Type = TokenAnnotation::TT_BlockComment;
676 }
677 }
678 }
679
Daniel Jasper8822d3a2012-12-04 13:02:32 +0000680 bool isUnaryOperator(unsigned Index) {
681 const Token &Tok = Line.Tokens[Index].Tok;
Daniel Jasperd56a7372012-12-06 13:16:39 +0000682
683 // '++', '--' and '!' are always unary operators.
684 if (Tok.is(tok::minusminus) || Tok.is(tok::plusplus) ||
685 Tok.is(tok::exclaim))
686 return true;
687
688 // The other possible unary operators are '+' and '-' as we
689 // determine the usage of '*' and '&' in determineStarAmpUsage().
Daniel Jasper8822d3a2012-12-04 13:02:32 +0000690 if (Tok.isNot(tok::minus) && Tok.isNot(tok::plus))
691 return false;
Daniel Jasperd56a7372012-12-06 13:16:39 +0000692
693 // Use heuristics to recognize unary operators.
Daniel Jasper8822d3a2012-12-04 13:02:32 +0000694 const Token &PreviousTok = Line.Tokens[Index - 1].Tok;
695 if (PreviousTok.is(tok::equal) || PreviousTok.is(tok::l_paren) ||
696 PreviousTok.is(tok::comma) || PreviousTok.is(tok::l_square))
697 return true;
Daniel Jasperd56a7372012-12-06 13:16:39 +0000698
699 // Fall back to marking the token as binary operator.
Daniel Jasper8822d3a2012-12-04 13:02:32 +0000700 return Annotations[Index - 1].Type == TokenAnnotation::TT_BinaryOperator;
701 }
702
Daniel Jasperbac016b2012-12-03 18:12:45 +0000703 bool isBinaryOperator(const FormatToken &Tok) {
704 switch (Tok.Tok.getKind()) {
705 case tok::equal:
706 case tok::equalequal:
Daniel Jasper112fb272012-12-05 07:51:39 +0000707 case tok::exclaimequal:
Daniel Jasperbac016b2012-12-03 18:12:45 +0000708 case tok::star:
709 //case tok::amp:
710 case tok::plus:
711 case tok::slash:
712 case tok::minus:
713 case tok::ampamp:
714 case tok::pipe:
715 case tok::pipepipe:
716 case tok::percent:
717 return true;
718 default:
719 return false;
720 }
721 }
722
Daniel Jasper112fb272012-12-05 07:51:39 +0000723 TokenAnnotation::TokenType determineStarAmpUsage(unsigned Index,
Daniel Jasper33182dd2012-12-05 14:57:28 +0000724 bool AssignmentEncountered) {
Daniel Jasperbac016b2012-12-03 18:12:45 +0000725 if (Index == Annotations.size())
726 return TokenAnnotation::TT_Unknown;
727
728 if (Index == 0 || Line.Tokens[Index - 1].Tok.is(tok::l_paren) ||
729 Line.Tokens[Index - 1].Tok.is(tok::comma) ||
730 Annotations[Index - 1].Type == TokenAnnotation::TT_BinaryOperator)
731 return TokenAnnotation::TT_UnaryOperator;
732
733 if (Line.Tokens[Index - 1].Tok.isLiteral() ||
734 Line.Tokens[Index + 1].Tok.isLiteral())
735 return TokenAnnotation::TT_BinaryOperator;
736
Daniel Jasper112fb272012-12-05 07:51:39 +0000737 // It is very unlikely that we are going to find a pointer or reference type
738 // definition on the RHS of an assignment.
Daniel Jasper33182dd2012-12-05 14:57:28 +0000739 if (AssignmentEncountered)
Daniel Jasper112fb272012-12-05 07:51:39 +0000740 return TokenAnnotation::TT_BinaryOperator;
741
Daniel Jasperbac016b2012-12-03 18:12:45 +0000742 return TokenAnnotation::TT_PointerOrReference;
743 }
744
745 bool isIfForOrWhile(Token Tok) {
746 return Tok.is(tok::kw_if) || Tok.is(tok::kw_for) || Tok.is(tok::kw_while);
747 }
748
749 bool spaceRequiredBetween(Token Left, Token Right) {
Daniel Jasper8b39c662012-12-10 18:59:13 +0000750 if (Right.is(tok::r_paren) || Right.is(tok::semi) || Right.is(tok::comma))
751 return false;
Daniel Jasperbac016b2012-12-03 18:12:45 +0000752 if (Left.is(tok::kw_template) && Right.is(tok::less))
753 return true;
754 if (Left.is(tok::arrow) || Right.is(tok::arrow))
755 return false;
756 if (Left.is(tok::exclaim) || Left.is(tok::tilde))
757 return false;
Fariborz Jahanian154120c2012-12-20 19:54:13 +0000758 if (Left.is(tok::at) && Right.is(tok::identifier))
759 return false;
Daniel Jasperbac016b2012-12-03 18:12:45 +0000760 if (Left.is(tok::less) || Right.is(tok::greater) || Right.is(tok::less))
761 return false;
Daniel Jasperc74e2792012-12-07 09:52:15 +0000762 if (Right.is(tok::amp) || Right.is(tok::star))
763 return Left.isLiteral() ||
764 (Left.isNot(tok::star) && Left.isNot(tok::amp) &&
765 !Style.PointerAndReferenceBindToType);
Daniel Jasperbac016b2012-12-03 18:12:45 +0000766 if (Left.is(tok::amp) || Left.is(tok::star))
767 return Right.isLiteral() || Style.PointerAndReferenceBindToType;
768 if (Right.is(tok::star) && Left.is(tok::l_paren))
769 return false;
Daniel Jasperbac016b2012-12-03 18:12:45 +0000770 if (Left.is(tok::l_square) || Right.is(tok::l_square) ||
771 Right.is(tok::r_square))
772 return false;
Daniel Jasperc74e2792012-12-07 09:52:15 +0000773 if (Left.is(tok::coloncolon) ||
774 (Right.is(tok::coloncolon) &&
775 (Left.is(tok::identifier) || Left.is(tok::greater))))
Daniel Jasperbac016b2012-12-03 18:12:45 +0000776 return false;
777 if (Left.is(tok::period) || Right.is(tok::period))
778 return false;
779 if (Left.is(tok::colon) || Right.is(tok::colon))
780 return true;
781 if ((Left.is(tok::plusplus) && Right.isAnyIdentifier()) ||
782 (Left.isAnyIdentifier() && Right.is(tok::plusplus)) ||
783 (Left.is(tok::minusminus) && Right.isAnyIdentifier()) ||
784 (Left.isAnyIdentifier() && Right.is(tok::minusminus)))
785 return false;
786 if (Left.is(tok::l_paren))
787 return false;
788 if (Left.is(tok::hash))
789 return false;
Daniel Jasperbac016b2012-12-03 18:12:45 +0000790 if (Right.is(tok::l_paren)) {
791 return !Left.isAnyIdentifier() || isIfForOrWhile(Left);
792 }
793 return true;
794 }
795
796 bool canBreakBetween(const FormatToken &Left, const FormatToken &Right) {
Daniel Jasper05b1ac82012-12-17 11:29:41 +0000797 if (Right.Tok.is(tok::r_paren) || Right.Tok.is(tok::l_brace) ||
798 Right.Tok.is(tok::comment) || Right.Tok.is(tok::greater))
Daniel Jasperbac016b2012-12-03 18:12:45 +0000799 return false;
Daniel Jasper9a0b4942012-12-17 14:34:14 +0000800 if (isBinaryOperator(Left) || Right.Tok.is(tok::lessless) ||
801 Right.Tok.is(tok::arrow) || Right.Tok.is(tok::period))
Daniel Jasper3b5943f2012-12-06 09:56:08 +0000802 return true;
Daniel Jasper05b1ac82012-12-17 11:29:41 +0000803 return Right.Tok.is(tok::colon) || Left.Tok.is(tok::comma) || Left.Tok.is(
804 tok::semi) || Left.Tok.is(tok::equal) || Left.Tok.is(tok::ampamp) ||
805 Left.Tok.is(tok::pipepipe) || Left.Tok.is(tok::l_brace) ||
Daniel Jasperbac016b2012-12-03 18:12:45 +0000806 (Left.Tok.is(tok::l_paren) && !Right.Tok.is(tok::r_paren));
807 }
808
809 const UnwrappedLine &Line;
810 FormatStyle Style;
811 SourceManager &SourceMgr;
812 std::vector<TokenAnnotation> Annotations;
813};
814
Alexander Kornienko469a21b2012-12-07 16:15:44 +0000815class LexerBasedFormatTokenSource : public FormatTokenSource {
816public:
817 LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr)
Daniel Jasper1321eb52012-12-18 21:05:13 +0000818 : GreaterStashed(false), Lex(Lex), SourceMgr(SourceMgr),
Alexander Kornienko469a21b2012-12-07 16:15:44 +0000819 IdentTable(Lex.getLangOpts()) {
820 Lex.SetKeepWhitespaceMode(true);
821 }
822
823 virtual FormatToken getNextToken() {
824 if (GreaterStashed) {
825 FormatTok.NewlinesBefore = 0;
826 FormatTok.WhiteSpaceStart =
827 FormatTok.Tok.getLocation().getLocWithOffset(1);
828 FormatTok.WhiteSpaceLength = 0;
829 GreaterStashed = false;
830 return FormatTok;
831 }
832
833 FormatTok = FormatToken();
834 Lex.LexFromRawLexer(FormatTok.Tok);
835 FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation();
836
837 // Consume and record whitespace until we find a significant token.
838 while (FormatTok.Tok.is(tok::unknown)) {
839 FormatTok.NewlinesBefore += tokenText(FormatTok.Tok).count('\n');
840 FormatTok.WhiteSpaceLength += FormatTok.Tok.getLength();
841
842 if (FormatTok.Tok.is(tok::eof))
843 return FormatTok;
844 Lex.LexFromRawLexer(FormatTok.Tok);
845 }
846
847 if (FormatTok.Tok.is(tok::raw_identifier)) {
848 const IdentifierInfo &Info = IdentTable.get(tokenText(FormatTok.Tok));
849 FormatTok.Tok.setKind(Info.getTokenID());
850 }
851
852 if (FormatTok.Tok.is(tok::greatergreater)) {
853 FormatTok.Tok.setKind(tok::greater);
854 GreaterStashed = true;
855 }
856
857 return FormatTok;
858 }
859
860private:
861 FormatToken FormatTok;
862 bool GreaterStashed;
863 Lexer &Lex;
864 SourceManager &SourceMgr;
865 IdentifierTable IdentTable;
866
867 /// Returns the text of \c FormatTok.
868 StringRef tokenText(Token &Tok) {
869 return StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
870 Tok.getLength());
871 }
872};
873
Daniel Jasperbac016b2012-12-03 18:12:45 +0000874class Formatter : public UnwrappedLineConsumer {
875public:
876 Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
877 const std::vector<CharSourceRange> &Ranges)
Daniel Jasper1321eb52012-12-18 21:05:13 +0000878 : Style(Style), Lex(Lex), SourceMgr(SourceMgr), Ranges(Ranges),
Alexander Kornienkocff563c2012-12-04 17:27:50 +0000879 StructuralError(false) {
Daniel Jasperbac016b2012-12-03 18:12:45 +0000880 }
881
Daniel Jasperaccb0b02012-12-04 21:05:31 +0000882 virtual ~Formatter() {
883 }
884
Daniel Jasperbac016b2012-12-03 18:12:45 +0000885 tooling::Replacements format() {
Alexander Kornienko469a21b2012-12-07 16:15:44 +0000886 LexerBasedFormatTokenSource Tokens(Lex, SourceMgr);
887 UnwrappedLineParser Parser(Style, Tokens, *this);
Alexander Kornienkocff563c2012-12-04 17:27:50 +0000888 StructuralError = Parser.parse();
889 for (std::vector<UnwrappedLine>::iterator I = UnwrappedLines.begin(),
890 E = UnwrappedLines.end();
891 I != E; ++I)
Alexander Kornienko720ffb62012-12-05 13:56:52 +0000892 formatUnwrappedLine(*I);
Daniel Jasperbac016b2012-12-03 18:12:45 +0000893 return Replaces;
894 }
895
896private:
Alexander Kornienko720ffb62012-12-05 13:56:52 +0000897 virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
Alexander Kornienkocff563c2012-12-04 17:27:50 +0000898 UnwrappedLines.push_back(TheLine);
899 }
900
Alexander Kornienko720ffb62012-12-05 13:56:52 +0000901 void formatUnwrappedLine(const UnwrappedLine &TheLine) {
Daniel Jasperbac016b2012-12-03 18:12:45 +0000902 if (TheLine.Tokens.size() == 0)
903 return;
904
905 CharSourceRange LineRange =
906 CharSourceRange::getTokenRange(TheLine.Tokens.front().Tok.getLocation(),
907 TheLine.Tokens.back().Tok.getLocation());
908
909 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
910 if (SourceMgr.isBeforeInTranslationUnit(LineRange.getEnd(),
911 Ranges[i].getBegin()) ||
912 SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(),
913 LineRange.getBegin()))
914 continue;
915
916 TokenAnnotator Annotator(TheLine, Style, SourceMgr);
917 Annotator.annotate();
918 UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine,
Alexander Kornienkocff563c2012-12-04 17:27:50 +0000919 Annotator.getAnnotations(), Replaces,
920 StructuralError);
Daniel Jasperbac016b2012-12-03 18:12:45 +0000921 Formatter.format();
922 return;
923 }
924 }
925
926 FormatStyle Style;
927 Lexer &Lex;
928 SourceManager &SourceMgr;
929 tooling::Replacements Replaces;
930 std::vector<CharSourceRange> Ranges;
Alexander Kornienkocff563c2012-12-04 17:27:50 +0000931 std::vector<UnwrappedLine> UnwrappedLines;
932 bool StructuralError;
Daniel Jasperbac016b2012-12-03 18:12:45 +0000933};
934
935tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
936 SourceManager &SourceMgr,
937 std::vector<CharSourceRange> Ranges) {
938 Formatter formatter(Style, Lex, SourceMgr, Ranges);
939 return formatter.format();
940}
941
942} // namespace format
943} // namespace clang