blob: 1b1827e3f9a9977f95c713664455ead7a642efb5 [file] [log] [blame]
Alexander Kornienko70ce7882013-04-15 14:28:00 +00001//===--- BreakableToken.cpp - Format C++ code -----------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief Contains implementation of BreakableToken class and classes derived
12/// from it.
13///
14//===----------------------------------------------------------------------===//
15
Manuel Klimekde008c02013-05-27 15:23:34 +000016#define DEBUG_TYPE "format-token-breaker"
17
Alexander Kornienko70ce7882013-04-15 14:28:00 +000018#include "BreakableToken.h"
Manuel Klimekde008c02013-05-27 15:23:34 +000019#include "clang/Format/Format.h"
Alexander Kornienko919398b2013-04-17 17:34:05 +000020#include "llvm/ADT/STLExtras.h"
Manuel Klimekde008c02013-05-27 15:23:34 +000021#include "llvm/Support/Debug.h"
Alexander Kornienko70ce7882013-04-15 14:28:00 +000022#include <algorithm>
23
24namespace clang {
25namespace format {
Manuel Klimekde008c02013-05-27 15:23:34 +000026namespace {
Alexander Kornienko70ce7882013-04-15 14:28:00 +000027
Manuel Klimekde008c02013-05-27 15:23:34 +000028// FIXME: Move helper string functions to where it makes sense.
29
30unsigned getOctalLength(StringRef Text) {
31 unsigned I = 1;
32 while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) {
33 ++I;
34 }
35 return I;
36}
37
38unsigned getHexLength(StringRef Text) {
39 unsigned I = 2; // Point after '\x'.
40 while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') ||
41 (Text[I] >= 'a' && Text[I] <= 'f') ||
42 (Text[I] >= 'A' && Text[I] <= 'F'))) {
43 ++I;
44 }
45 return I;
46}
47
48unsigned getEscapeSequenceLength(StringRef Text) {
49 assert(Text[0] == '\\');
50 if (Text.size() < 2)
51 return 1;
52
53 switch (Text[1]) {
54 case 'u':
55 return 6;
56 case 'U':
57 return 10;
58 case 'x':
59 return getHexLength(Text);
60 default:
61 if (Text[1] >= '0' && Text[1] <= '7')
62 return getOctalLength(Text);
63 return 2;
64 }
65}
66
67StringRef::size_type getStartOfCharacter(StringRef Text,
68 StringRef::size_type Offset) {
69 StringRef::size_type NextEscape = Text.find('\\');
70 while (NextEscape != StringRef::npos && NextEscape < Offset) {
71 StringRef::size_type SequenceLength =
72 getEscapeSequenceLength(Text.substr(NextEscape));
73 if (Offset < NextEscape + SequenceLength)
74 return NextEscape;
75 NextEscape = Text.find('\\', NextEscape + SequenceLength);
76 }
77 return Offset;
78}
79
80BreakableToken::Split getCommentSplit(StringRef Text,
81 unsigned ContentStartColumn,
82 unsigned ColumnLimit) {
Alexander Kornienko919398b2013-04-17 17:34:05 +000083 if (ColumnLimit <= ContentStartColumn + 1)
Manuel Klimekde008c02013-05-27 15:23:34 +000084 return BreakableToken::Split(StringRef::npos, 0);
Alexander Kornienko919398b2013-04-17 17:34:05 +000085
86 unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
87 StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
88 if (SpaceOffset == StringRef::npos ||
Manuel Klimekde008c02013-05-27 15:23:34 +000089 // Don't break at leading whitespace.
90 Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos)
Alexander Kornienko919398b2013-04-17 17:34:05 +000091 SpaceOffset = Text.find(' ', MaxSplit);
Alexander Kornienko919398b2013-04-17 17:34:05 +000092 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
93 StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim();
94 StringRef AfterCut = Text.substr(SpaceOffset).ltrim();
95 return BreakableToken::Split(BeforeCut.size(),
96 AfterCut.begin() - BeforeCut.end());
97 }
98 return BreakableToken::Split(StringRef::npos, 0);
99}
100
Manuel Klimekde008c02013-05-27 15:23:34 +0000101BreakableToken::Split getStringSplit(StringRef Text,
102 unsigned ContentStartColumn,
103 unsigned ColumnLimit) {
Alexander Kornienko919398b2013-04-17 17:34:05 +0000104
Manuel Klimekde008c02013-05-27 15:23:34 +0000105 if (ColumnLimit <= ContentStartColumn)
106 return BreakableToken::Split(StringRef::npos, 0);
107 unsigned MaxSplit = ColumnLimit - ContentStartColumn;
108 // FIXME: Reduce unit test case.
109 if (Text.empty())
110 return BreakableToken::Split(StringRef::npos, 0);
111 MaxSplit = std::min<unsigned>(MaxSplit, Text.size() - 1);
112 StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
113 if (SpaceOffset != StringRef::npos && SpaceOffset != 0)
114 return BreakableToken::Split(SpaceOffset + 1, 0);
115 StringRef::size_type SlashOffset = Text.rfind('/', MaxSplit);
116 if (SlashOffset != StringRef::npos && SlashOffset != 0)
117 return BreakableToken::Split(SlashOffset + 1, 0);
118 StringRef::size_type SplitPoint = getStartOfCharacter(Text, MaxSplit);
119 if (SplitPoint == StringRef::npos || SplitPoint == 0)
120 return BreakableToken::Split(StringRef::npos, 0);
121 return BreakableToken::Split(SplitPoint, 0);
Alexander Kornienko919398b2013-04-17 17:34:05 +0000122}
123
Manuel Klimekde008c02013-05-27 15:23:34 +0000124} // namespace
Alexander Kornienko70ce7882013-04-15 14:28:00 +0000125
Manuel Klimekde008c02013-05-27 15:23:34 +0000126unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
Alexander Kornienko70ce7882013-04-15 14:28:00 +0000127
Manuel Klimekde008c02013-05-27 15:23:34 +0000128unsigned
129BreakableSingleLineToken::getLineLengthAfterSplit(unsigned LineIndex,
130 unsigned TailOffset) const {
131 return StartColumn + Prefix.size() + Postfix.size() + Line.size() -
132 TailOffset;
Alexander Kornienko70ce7882013-04-15 14:28:00 +0000133}
134
Manuel Klimekde008c02013-05-27 15:23:34 +0000135void BreakableSingleLineToken::insertBreak(unsigned LineIndex,
136 unsigned TailOffset, Split Split,
137 bool InPPDirective,
138 WhitespaceManager &Whitespaces) {
139 Whitespaces.breakToken(Tok, Prefix.size() + TailOffset + Split.first,
140 Split.second, Postfix, Prefix, InPPDirective,
141 StartColumn);
Alexander Kornienko70ce7882013-04-15 14:28:00 +0000142}
143
Manuel Klimekde008c02013-05-27 15:23:34 +0000144BreakableSingleLineToken::BreakableSingleLineToken(const FormatToken &Tok,
145 unsigned StartColumn,
146 StringRef Prefix,
147 StringRef Postfix)
148 : BreakableToken(Tok), StartColumn(StartColumn), Prefix(Prefix),
149 Postfix(Postfix) {
150 assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
151 Line = Tok.TokenText.substr(
152 Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
Alexander Kornienko70ce7882013-04-15 14:28:00 +0000153}
154
Manuel Klimekde008c02013-05-27 15:23:34 +0000155BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok,
156 unsigned StartColumn)
157 : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"") {}
158
159BreakableToken::Split
160BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
161 unsigned ColumnLimit) const {
162 return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit);
Alexander Kornienko919398b2013-04-17 17:34:05 +0000163}
164
Manuel Klimekde008c02013-05-27 15:23:34 +0000165static StringRef getLineCommentPrefix(StringRef Comment) {
Alexander Kornienko919398b2013-04-17 17:34:05 +0000166 const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" };
Manuel Klimekde008c02013-05-27 15:23:34 +0000167 for (size_t i = 0, e = llvm::array_lengthof(KnownPrefixes); i != e; ++i)
Alexander Kornienko919398b2013-04-17 17:34:05 +0000168 if (Comment.startswith(KnownPrefixes[i]))
169 return KnownPrefixes[i];
170 return "";
171}
172
Manuel Klimekde008c02013-05-27 15:23:34 +0000173BreakableLineComment::BreakableLineComment(const FormatToken &Token,
174 unsigned StartColumn)
175 : BreakableSingleLineToken(Token, StartColumn,
176 getLineCommentPrefix(Token.TokenText), "") {}
177
178BreakableToken::Split
179BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset,
180 unsigned ColumnLimit) const {
181 return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(),
182 ColumnLimit);
183}
184
185BreakableBlockComment::BreakableBlockComment(const FormatStyle &Style,
186 const FormatToken &Token,
187 unsigned StartColumn,
188 unsigned OriginalStartColumn,
189 bool FirstInLine)
190 : BreakableToken(Token) {
191 StringRef TokenText(Token.TokenText);
192 assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
193 TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
194
195 int IndentDelta = StartColumn - OriginalStartColumn;
196 bool NeedsStar = true;
197 LeadingWhitespace.resize(Lines.size());
198 StartOfLineColumn.resize(Lines.size());
199 if (Lines.size() == 1 && !FirstInLine) {
200 // Comments for which FirstInLine is false can start on arbitrary column,
201 // and available horizontal space can be too small to align consecutive
202 // lines with the first one.
203 // FIXME: We could, probably, align them to current indentation level, but
204 // now we just wrap them without stars.
205 NeedsStar = false;
206 }
207 StartOfLineColumn[0] = StartColumn + 2;
208 for (size_t i = 1; i < Lines.size(); ++i) {
209 adjustWhitespace(Style, i, IndentDelta);
210 if (Lines[i].empty())
211 // If the last line is empty, the closing "*/" will have a star.
212 NeedsStar = NeedsStar && i + 1 == Lines.size();
213 else
214 NeedsStar = NeedsStar && Lines[i][0] == '*';
215 }
216 Decoration = NeedsStar ? "* " : "";
217 IndentAtLineBreak = StartOfLineColumn[0] + 1;
218 for (size_t i = 1; i < Lines.size(); ++i) {
219 if (Lines[i].empty()) {
220 if (!NeedsStar && i + 1 != Lines.size())
221 // For all but the last line (which always ends in */), set the
222 // start column to 0 if they're empty, so we do not insert
223 // trailing whitespace anywhere.
224 StartOfLineColumn[i] = 0;
225 continue;
226 }
227 if (NeedsStar) {
228 // The first line already excludes the star.
229 // For all other lines, adjust the line to exclude the star and
230 // (optionally) the first whitespace.
231 int Offset = Lines[i].startswith("* ") ? 2 : 1;
232 StartOfLineColumn[i] += Offset;
233 Lines[i] = Lines[i].substr(Offset);
234 LeadingWhitespace[i] += Offset;
235 }
236 IndentAtLineBreak = std::min<int>(IndentAtLineBreak, StartOfLineColumn[i]);
237 }
238 DEBUG({
239 for (size_t i = 0; i < Lines.size(); ++i) {
240 llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i]
241 << "\n";
242 }
243 });
244}
245
246void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
247 unsigned LineIndex,
248 int IndentDelta) {
249 // Calculate the end of the non-whitespace text in the previous line.
250 size_t EndOfPreviousLine = Lines[LineIndex - 1].find_last_not_of(" \\\t");
251 if (EndOfPreviousLine == StringRef::npos)
252 EndOfPreviousLine = 0;
253 else
254 ++EndOfPreviousLine;
255 // Calculate the start of the non-whitespace text in the current line.
256 size_t StartOfLine = Lines[LineIndex].find_first_not_of(" \t");
257 if (StartOfLine == StringRef::npos)
258 StartOfLine = Lines[LineIndex].size();
259 // FIXME: Tabs are not always 8 characters. Make configurable in the style.
260 unsigned Column = 0;
261 StringRef OriginalIndentText = Lines[LineIndex].substr(0, StartOfLine);
262 for (int i = 0, e = OriginalIndentText.size(); i != e; ++i) {
263 if (Lines[LineIndex][i] == '\t')
264 Column += 8 - (Column % 8);
265 else
266 ++Column;
267 }
268
269 // Adjust Lines to only contain relevant text.
270 Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine);
271 Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine);
272 // Adjust LeadingWhitespace to account all whitespace between the lines
273 // to the current line.
274 LeadingWhitespace[LineIndex] =
275 Lines[LineIndex].begin() - Lines[LineIndex - 1].end();
276 // Adjust the start column uniformly accross all lines.
277 StartOfLineColumn[LineIndex] = std::max<int>(0, Column + IndentDelta);
278}
279
280unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }
281
282unsigned
283BreakableBlockComment::getLineLengthAfterSplit(unsigned LineIndex,
284 unsigned TailOffset) const {
285 return getContentStartColumn(LineIndex, TailOffset) +
286 (Lines[LineIndex].size() - TailOffset) +
287 // The last line gets a "*/" postfix.
288 (LineIndex + 1 == Lines.size() ? 2 : 0);
289}
290
291BreakableToken::Split
292BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset,
293 unsigned ColumnLimit) const {
294 return getCommentSplit(Lines[LineIndex].substr(TailOffset),
295 getContentStartColumn(LineIndex, TailOffset),
296 ColumnLimit);
297}
298
299void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
300 Split Split, bool InPPDirective,
301 WhitespaceManager &Whitespaces) {
302 StringRef Text = Lines[LineIndex].substr(TailOffset);
303 StringRef Prefix = Decoration;
304 if (LineIndex + 1 == Lines.size() &&
305 Text.size() == Split.first + Split.second) {
306 // For the last line we need to break before "*/", but not to add "* ".
307 Prefix = "";
308 }
309
310 unsigned BreakOffsetInToken =
311 Text.data() - Tok.TokenText.data() + Split.first;
312 unsigned CharsToRemove = Split.second;
313 Whitespaces.breakToken(Tok, BreakOffsetInToken, CharsToRemove, "", Prefix,
314 InPPDirective, IndentAtLineBreak - Decoration.size());
315}
316
317void
318BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex,
319 unsigned InPPDirective,
320 WhitespaceManager &Whitespaces) {
321 if (LineIndex == 0)
322 return;
323 StringRef Prefix = Decoration;
324 if (LineIndex + 1 == Lines.size() && Lines[LineIndex].empty())
325 Prefix = "";
326
327 unsigned WhitespaceOffsetInToken =
328 Lines[LineIndex].data() - Tok.TokenText.data() -
329 LeadingWhitespace[LineIndex];
330 Whitespaces.breakToken(
331 Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix,
332 InPPDirective, StartOfLineColumn[LineIndex] - Prefix.size());
333}
334
335unsigned
336BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
337 unsigned TailOffset) const {
338 // If we break, we always break at the predefined indent.
339 if (TailOffset != 0)
340 return IndentAtLineBreak;
341 return StartOfLineColumn[LineIndex];
342}
343
Alexander Kornienko70ce7882013-04-15 14:28:00 +0000344} // namespace format
345} // namespace clang