blob: 8672ece9b798a3c27ede911185154f76bd800bb4 [file] [log] [blame]
Alexander Kornienko70ce7882013-04-15 14:28:00 +00001//===--- BreakableToken.cpp - Format C++ code -----------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief Contains implementation of BreakableToken class and classes derived
12/// from it.
13///
14//===----------------------------------------------------------------------===//
15
Manuel Klimekde008c02013-05-27 15:23:34 +000016#define DEBUG_TYPE "format-token-breaker"
17
Alexander Kornienko70ce7882013-04-15 14:28:00 +000018#include "BreakableToken.h"
Manuel Klimekde008c02013-05-27 15:23:34 +000019#include "clang/Format/Format.h"
Alexander Kornienko919398b2013-04-17 17:34:05 +000020#include "llvm/ADT/STLExtras.h"
Manuel Klimekde008c02013-05-27 15:23:34 +000021#include "llvm/Support/Debug.h"
Alexander Kornienko70ce7882013-04-15 14:28:00 +000022#include <algorithm>
23
24namespace clang {
25namespace format {
Manuel Klimekde008c02013-05-27 15:23:34 +000026namespace {
Alexander Kornienko70ce7882013-04-15 14:28:00 +000027
Manuel Klimekde008c02013-05-27 15:23:34 +000028// FIXME: Move helper string functions to where it makes sense.
29
30unsigned getOctalLength(StringRef Text) {
31 unsigned I = 1;
32 while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) {
33 ++I;
34 }
35 return I;
36}
37
38unsigned getHexLength(StringRef Text) {
39 unsigned I = 2; // Point after '\x'.
40 while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') ||
41 (Text[I] >= 'a' && Text[I] <= 'f') ||
42 (Text[I] >= 'A' && Text[I] <= 'F'))) {
43 ++I;
44 }
45 return I;
46}
47
48unsigned getEscapeSequenceLength(StringRef Text) {
49 assert(Text[0] == '\\');
50 if (Text.size() < 2)
51 return 1;
52
53 switch (Text[1]) {
54 case 'u':
55 return 6;
56 case 'U':
57 return 10;
58 case 'x':
59 return getHexLength(Text);
60 default:
61 if (Text[1] >= '0' && Text[1] <= '7')
62 return getOctalLength(Text);
63 return 2;
64 }
65}
66
67StringRef::size_type getStartOfCharacter(StringRef Text,
68 StringRef::size_type Offset) {
69 StringRef::size_type NextEscape = Text.find('\\');
70 while (NextEscape != StringRef::npos && NextEscape < Offset) {
71 StringRef::size_type SequenceLength =
72 getEscapeSequenceLength(Text.substr(NextEscape));
73 if (Offset < NextEscape + SequenceLength)
74 return NextEscape;
75 NextEscape = Text.find('\\', NextEscape + SequenceLength);
76 }
77 return Offset;
78}
79
80BreakableToken::Split getCommentSplit(StringRef Text,
81 unsigned ContentStartColumn,
82 unsigned ColumnLimit) {
Alexander Kornienko919398b2013-04-17 17:34:05 +000083 if (ColumnLimit <= ContentStartColumn + 1)
Manuel Klimekde008c02013-05-27 15:23:34 +000084 return BreakableToken::Split(StringRef::npos, 0);
Alexander Kornienko919398b2013-04-17 17:34:05 +000085
86 unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
87 StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
88 if (SpaceOffset == StringRef::npos ||
Manuel Klimekde008c02013-05-27 15:23:34 +000089 // Don't break at leading whitespace.
Manuel Klimekbe9ed772013-05-29 22:06:18 +000090 Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos) {
91 // Make sure that we don't break at leading whitespace that
92 // reaches past MaxSplit.
93 StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(" ");
94 if (FirstNonWhitespace == StringRef::npos)
95 // If the comment is only whitespace, we cannot split.
96 return BreakableToken::Split(StringRef::npos, 0);
97 SpaceOffset =
98 Text.find(' ', std::max<unsigned>(MaxSplit, FirstNonWhitespace));
99 }
Alexander Kornienko919398b2013-04-17 17:34:05 +0000100 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
101 StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim();
102 StringRef AfterCut = Text.substr(SpaceOffset).ltrim();
103 return BreakableToken::Split(BeforeCut.size(),
104 AfterCut.begin() - BeforeCut.end());
105 }
106 return BreakableToken::Split(StringRef::npos, 0);
107}
108
Manuel Klimekde008c02013-05-27 15:23:34 +0000109BreakableToken::Split getStringSplit(StringRef Text,
110 unsigned ContentStartColumn,
111 unsigned ColumnLimit) {
Alexander Kornienko919398b2013-04-17 17:34:05 +0000112
Manuel Klimekde008c02013-05-27 15:23:34 +0000113 if (ColumnLimit <= ContentStartColumn)
114 return BreakableToken::Split(StringRef::npos, 0);
115 unsigned MaxSplit = ColumnLimit - ContentStartColumn;
116 // FIXME: Reduce unit test case.
117 if (Text.empty())
118 return BreakableToken::Split(StringRef::npos, 0);
119 MaxSplit = std::min<unsigned>(MaxSplit, Text.size() - 1);
120 StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
121 if (SpaceOffset != StringRef::npos && SpaceOffset != 0)
122 return BreakableToken::Split(SpaceOffset + 1, 0);
123 StringRef::size_type SlashOffset = Text.rfind('/', MaxSplit);
124 if (SlashOffset != StringRef::npos && SlashOffset != 0)
125 return BreakableToken::Split(SlashOffset + 1, 0);
126 StringRef::size_type SplitPoint = getStartOfCharacter(Text, MaxSplit);
127 if (SplitPoint == StringRef::npos || SplitPoint == 0)
128 return BreakableToken::Split(StringRef::npos, 0);
129 return BreakableToken::Split(SplitPoint, 0);
Alexander Kornienko919398b2013-04-17 17:34:05 +0000130}
131
Manuel Klimekde008c02013-05-27 15:23:34 +0000132} // namespace
Alexander Kornienko70ce7882013-04-15 14:28:00 +0000133
Manuel Klimekde008c02013-05-27 15:23:34 +0000134unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
Alexander Kornienko70ce7882013-04-15 14:28:00 +0000135
Manuel Klimekde008c02013-05-27 15:23:34 +0000136unsigned
137BreakableSingleLineToken::getLineLengthAfterSplit(unsigned LineIndex,
138 unsigned TailOffset) const {
139 return StartColumn + Prefix.size() + Postfix.size() + Line.size() -
140 TailOffset;
Alexander Kornienko70ce7882013-04-15 14:28:00 +0000141}
142
Manuel Klimekde008c02013-05-27 15:23:34 +0000143void BreakableSingleLineToken::insertBreak(unsigned LineIndex,
144 unsigned TailOffset, Split Split,
145 bool InPPDirective,
146 WhitespaceManager &Whitespaces) {
147 Whitespaces.breakToken(Tok, Prefix.size() + TailOffset + Split.first,
148 Split.second, Postfix, Prefix, InPPDirective,
149 StartColumn);
Alexander Kornienko70ce7882013-04-15 14:28:00 +0000150}
151
Manuel Klimekde008c02013-05-27 15:23:34 +0000152BreakableSingleLineToken::BreakableSingleLineToken(const FormatToken &Tok,
153 unsigned StartColumn,
154 StringRef Prefix,
155 StringRef Postfix)
156 : BreakableToken(Tok), StartColumn(StartColumn), Prefix(Prefix),
157 Postfix(Postfix) {
158 assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
159 Line = Tok.TokenText.substr(
160 Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
Alexander Kornienko70ce7882013-04-15 14:28:00 +0000161}
162
Manuel Klimekde008c02013-05-27 15:23:34 +0000163BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok,
164 unsigned StartColumn)
165 : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"") {}
166
167BreakableToken::Split
168BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
169 unsigned ColumnLimit) const {
170 return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit);
Alexander Kornienko919398b2013-04-17 17:34:05 +0000171}
172
Manuel Klimekde008c02013-05-27 15:23:34 +0000173static StringRef getLineCommentPrefix(StringRef Comment) {
Alexander Kornienko919398b2013-04-17 17:34:05 +0000174 const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" };
Manuel Klimekde008c02013-05-27 15:23:34 +0000175 for (size_t i = 0, e = llvm::array_lengthof(KnownPrefixes); i != e; ++i)
Alexander Kornienko919398b2013-04-17 17:34:05 +0000176 if (Comment.startswith(KnownPrefixes[i]))
177 return KnownPrefixes[i];
178 return "";
179}
180
Manuel Klimekde008c02013-05-27 15:23:34 +0000181BreakableLineComment::BreakableLineComment(const FormatToken &Token,
182 unsigned StartColumn)
183 : BreakableSingleLineToken(Token, StartColumn,
184 getLineCommentPrefix(Token.TokenText), "") {}
185
186BreakableToken::Split
187BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset,
188 unsigned ColumnLimit) const {
189 return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(),
190 ColumnLimit);
191}
192
193BreakableBlockComment::BreakableBlockComment(const FormatStyle &Style,
194 const FormatToken &Token,
195 unsigned StartColumn,
196 unsigned OriginalStartColumn,
197 bool FirstInLine)
198 : BreakableToken(Token) {
199 StringRef TokenText(Token.TokenText);
200 assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
201 TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
202
203 int IndentDelta = StartColumn - OriginalStartColumn;
204 bool NeedsStar = true;
205 LeadingWhitespace.resize(Lines.size());
206 StartOfLineColumn.resize(Lines.size());
207 if (Lines.size() == 1 && !FirstInLine) {
208 // Comments for which FirstInLine is false can start on arbitrary column,
209 // and available horizontal space can be too small to align consecutive
210 // lines with the first one.
211 // FIXME: We could, probably, align them to current indentation level, but
212 // now we just wrap them without stars.
213 NeedsStar = false;
214 }
215 StartOfLineColumn[0] = StartColumn + 2;
216 for (size_t i = 1; i < Lines.size(); ++i) {
217 adjustWhitespace(Style, i, IndentDelta);
218 if (Lines[i].empty())
219 // If the last line is empty, the closing "*/" will have a star.
220 NeedsStar = NeedsStar && i + 1 == Lines.size();
221 else
222 NeedsStar = NeedsStar && Lines[i][0] == '*';
223 }
224 Decoration = NeedsStar ? "* " : "";
225 IndentAtLineBreak = StartOfLineColumn[0] + 1;
226 for (size_t i = 1; i < Lines.size(); ++i) {
227 if (Lines[i].empty()) {
228 if (!NeedsStar && i + 1 != Lines.size())
229 // For all but the last line (which always ends in */), set the
230 // start column to 0 if they're empty, so we do not insert
231 // trailing whitespace anywhere.
232 StartOfLineColumn[i] = 0;
233 continue;
234 }
235 if (NeedsStar) {
236 // The first line already excludes the star.
237 // For all other lines, adjust the line to exclude the star and
238 // (optionally) the first whitespace.
239 int Offset = Lines[i].startswith("* ") ? 2 : 1;
240 StartOfLineColumn[i] += Offset;
241 Lines[i] = Lines[i].substr(Offset);
242 LeadingWhitespace[i] += Offset;
243 }
Daniel Jasper92092702013-05-30 15:20:29 +0000244 // Exclude empty lines from the calculation of the left-most column.
245 if (Lines[i].empty())
246 continue;
Manuel Klimekde008c02013-05-27 15:23:34 +0000247 IndentAtLineBreak = std::min<int>(IndentAtLineBreak, StartOfLineColumn[i]);
248 }
249 DEBUG({
250 for (size_t i = 0; i < Lines.size(); ++i) {
251 llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i]
252 << "\n";
253 }
254 });
255}
256
257void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
258 unsigned LineIndex,
259 int IndentDelta) {
260 // Calculate the end of the non-whitespace text in the previous line.
261 size_t EndOfPreviousLine = Lines[LineIndex - 1].find_last_not_of(" \\\t");
262 if (EndOfPreviousLine == StringRef::npos)
263 EndOfPreviousLine = 0;
264 else
265 ++EndOfPreviousLine;
266 // Calculate the start of the non-whitespace text in the current line.
267 size_t StartOfLine = Lines[LineIndex].find_first_not_of(" \t");
268 if (StartOfLine == StringRef::npos)
269 StartOfLine = Lines[LineIndex].size();
Manuel Klimekde008c02013-05-27 15:23:34 +0000270
271 // Adjust Lines to only contain relevant text.
272 Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine);
273 Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine);
274 // Adjust LeadingWhitespace to account all whitespace between the lines
275 // to the current line.
276 LeadingWhitespace[LineIndex] =
277 Lines[LineIndex].begin() - Lines[LineIndex - 1].end();
Manuel Klimekd63312b2013-05-28 10:01:59 +0000278
279 // FIXME: We currently count tabs as 1 character. To solve this, we need to
280 // get the correct indentation width of the start of the comment, which
281 // requires correct counting of the tab expansions before the comment, and
282 // a configurable tab width. Since the current implementation only breaks
283 // if leading tabs are intermixed with spaces, that is not a high priority.
284
Manuel Klimekde008c02013-05-27 15:23:34 +0000285 // Adjust the start column uniformly accross all lines.
Manuel Klimekd63312b2013-05-28 10:01:59 +0000286 StartOfLineColumn[LineIndex] = std::max<int>(0, StartOfLine + IndentDelta);
Manuel Klimekde008c02013-05-27 15:23:34 +0000287}
288
289unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }
290
291unsigned
292BreakableBlockComment::getLineLengthAfterSplit(unsigned LineIndex,
293 unsigned TailOffset) const {
294 return getContentStartColumn(LineIndex, TailOffset) +
295 (Lines[LineIndex].size() - TailOffset) +
296 // The last line gets a "*/" postfix.
297 (LineIndex + 1 == Lines.size() ? 2 : 0);
298}
299
300BreakableToken::Split
301BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset,
302 unsigned ColumnLimit) const {
303 return getCommentSplit(Lines[LineIndex].substr(TailOffset),
304 getContentStartColumn(LineIndex, TailOffset),
305 ColumnLimit);
306}
307
308void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
309 Split Split, bool InPPDirective,
310 WhitespaceManager &Whitespaces) {
311 StringRef Text = Lines[LineIndex].substr(TailOffset);
312 StringRef Prefix = Decoration;
313 if (LineIndex + 1 == Lines.size() &&
314 Text.size() == Split.first + Split.second) {
315 // For the last line we need to break before "*/", but not to add "* ".
316 Prefix = "";
317 }
318
319 unsigned BreakOffsetInToken =
320 Text.data() - Tok.TokenText.data() + Split.first;
321 unsigned CharsToRemove = Split.second;
Manuel Klimekb6dba332013-05-30 07:45:53 +0000322 assert(IndentAtLineBreak >= Decoration.size());
Manuel Klimekde008c02013-05-27 15:23:34 +0000323 Whitespaces.breakToken(Tok, BreakOffsetInToken, CharsToRemove, "", Prefix,
324 InPPDirective, IndentAtLineBreak - Decoration.size());
325}
326
327void
328BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex,
329 unsigned InPPDirective,
330 WhitespaceManager &Whitespaces) {
331 if (LineIndex == 0)
332 return;
333 StringRef Prefix = Decoration;
Manuel Klimekc5cc4bf2013-05-28 08:55:01 +0000334 if (Lines[LineIndex].empty()) {
335 if (LineIndex + 1 == Lines.size()) {
336 // If the last line is empty, we don't need a prefix, as the */ will line
337 // up with the decoration (if it exists).
338 Prefix = "";
339 } else if (!Decoration.empty()) {
340 // For other empty lines, if we do have a decoration, adapt it to not
341 // contain a trailing whitespace.
342 Prefix = Prefix.substr(0, 1);
343 }
Daniel Jaspere2c482f2013-05-30 06:40:07 +0000344 } else {
345 if (StartOfLineColumn[LineIndex] == 1) {
346 // This lines starts immediately after the decorating *.
347 Prefix = Prefix.substr(0, 1);
348 }
Manuel Klimekc5cc4bf2013-05-28 08:55:01 +0000349 }
Manuel Klimekde008c02013-05-27 15:23:34 +0000350
351 unsigned WhitespaceOffsetInToken =
352 Lines[LineIndex].data() - Tok.TokenText.data() -
353 LeadingWhitespace[LineIndex];
Manuel Klimekb6dba332013-05-30 07:45:53 +0000354 assert(StartOfLineColumn[LineIndex] >= Prefix.size());
Manuel Klimekde008c02013-05-27 15:23:34 +0000355 Whitespaces.breakToken(
356 Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix,
357 InPPDirective, StartOfLineColumn[LineIndex] - Prefix.size());
358}
359
360unsigned
361BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
362 unsigned TailOffset) const {
363 // If we break, we always break at the predefined indent.
364 if (TailOffset != 0)
365 return IndentAtLineBreak;
366 return StartOfLineColumn[LineIndex];
367}
368
Alexander Kornienko70ce7882013-04-15 14:28:00 +0000369} // namespace format
370} // namespace clang