blob: 94b4322e7e0b5b388da34cb821d2c241f9327dca [file] [log] [blame]
Alexander Kornienko70ce7882013-04-15 14:28:00 +00001//===--- BreakableToken.cpp - Format C++ code -----------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief Contains implementation of BreakableToken class and classes derived
12/// from it.
13///
14//===----------------------------------------------------------------------===//
15
Manuel Klimekde008c02013-05-27 15:23:34 +000016#define DEBUG_TYPE "format-token-breaker"
17
Alexander Kornienko70ce7882013-04-15 14:28:00 +000018#include "BreakableToken.h"
Manuel Klimekde008c02013-05-27 15:23:34 +000019#include "clang/Format/Format.h"
Alexander Kornienko919398b2013-04-17 17:34:05 +000020#include "llvm/ADT/STLExtras.h"
Manuel Klimekde008c02013-05-27 15:23:34 +000021#include "llvm/Support/Debug.h"
Alexander Kornienko70ce7882013-04-15 14:28:00 +000022#include <algorithm>
23
24namespace clang {
25namespace format {
Manuel Klimekde008c02013-05-27 15:23:34 +000026namespace {
Alexander Kornienko70ce7882013-04-15 14:28:00 +000027
Manuel Klimekde008c02013-05-27 15:23:34 +000028BreakableToken::Split getCommentSplit(StringRef Text,
29 unsigned ContentStartColumn,
Alexander Kornienko00895102013-06-05 14:09:10 +000030 unsigned ColumnLimit,
31 encoding::Encoding Encoding) {
Alexander Kornienko919398b2013-04-17 17:34:05 +000032 if (ColumnLimit <= ContentStartColumn + 1)
Manuel Klimekde008c02013-05-27 15:23:34 +000033 return BreakableToken::Split(StringRef::npos, 0);
Alexander Kornienko919398b2013-04-17 17:34:05 +000034
35 unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
Alexander Kornienko00895102013-06-05 14:09:10 +000036 unsigned MaxSplitBytes = 0;
37
38 for (unsigned NumChars = 0;
39 NumChars < MaxSplit && MaxSplitBytes < Text.size(); ++NumChars)
40 MaxSplitBytes +=
41 encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);
42
43 StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplitBytes);
Alexander Kornienko919398b2013-04-17 17:34:05 +000044 if (SpaceOffset == StringRef::npos ||
Manuel Klimekde008c02013-05-27 15:23:34 +000045 // Don't break at leading whitespace.
Manuel Klimekbe9ed772013-05-29 22:06:18 +000046 Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos) {
47 // Make sure that we don't break at leading whitespace that
48 // reaches past MaxSplit.
49 StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(" ");
50 if (FirstNonWhitespace == StringRef::npos)
51 // If the comment is only whitespace, we cannot split.
52 return BreakableToken::Split(StringRef::npos, 0);
53 SpaceOffset =
Alexander Kornienko00895102013-06-05 14:09:10 +000054 Text.find(' ', std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace));
Manuel Klimekbe9ed772013-05-29 22:06:18 +000055 }
Alexander Kornienko919398b2013-04-17 17:34:05 +000056 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
57 StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim();
58 StringRef AfterCut = Text.substr(SpaceOffset).ltrim();
59 return BreakableToken::Split(BeforeCut.size(),
60 AfterCut.begin() - BeforeCut.end());
61 }
62 return BreakableToken::Split(StringRef::npos, 0);
63}
64
Manuel Klimekde008c02013-05-27 15:23:34 +000065BreakableToken::Split getStringSplit(StringRef Text,
66 unsigned ContentStartColumn,
Alexander Kornienko00895102013-06-05 14:09:10 +000067 unsigned ColumnLimit,
68 encoding::Encoding Encoding) {
Manuel Klimekde008c02013-05-27 15:23:34 +000069 // FIXME: Reduce unit test case.
70 if (Text.empty())
71 return BreakableToken::Split(StringRef::npos, 0);
Alexander Kornienko00895102013-06-05 14:09:10 +000072 if (ColumnLimit <= ContentStartColumn)
Manuel Klimekde008c02013-05-27 15:23:34 +000073 return BreakableToken::Split(StringRef::npos, 0);
Alexander Kornienko00895102013-06-05 14:09:10 +000074 unsigned MaxSplit =
75 std::min<unsigned>(ColumnLimit - ContentStartColumn,
76 encoding::getCodePointCount(Text, Encoding) - 1);
77 StringRef::size_type SpaceOffset = 0;
78 StringRef::size_type SlashOffset = 0;
79 StringRef::size_type SplitPoint = 0;
80 for (unsigned Chars = 0;;) {
81 unsigned Advance;
82 if (Text[0] == '\\') {
83 Advance = encoding::getEscapeSequenceLength(Text);
84 Chars += Advance;
85 } else {
86 Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
87 Chars += 1;
88 }
89
90 if (Chars > MaxSplit)
91 break;
92
93 if (Text[0] == ' ')
94 SpaceOffset = SplitPoint;
95 if (Text[0] == '/')
96 SlashOffset = SplitPoint;
97
98 SplitPoint += Advance;
99 Text = Text.substr(Advance);
100 }
101
102 if (SpaceOffset != 0)
103 return BreakableToken::Split(SpaceOffset + 1, 0);
104 if (SlashOffset != 0)
105 return BreakableToken::Split(SlashOffset + 1, 0);
106 if (SplitPoint != 0)
107 return BreakableToken::Split(SplitPoint, 0);
108 return BreakableToken::Split(StringRef::npos, 0);
Alexander Kornienko919398b2013-04-17 17:34:05 +0000109}
110
Manuel Klimekde008c02013-05-27 15:23:34 +0000111} // namespace
Alexander Kornienko70ce7882013-04-15 14:28:00 +0000112
Manuel Klimekde008c02013-05-27 15:23:34 +0000113unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
Alexander Kornienko70ce7882013-04-15 14:28:00 +0000114
Alexander Kornienko2785b9a2013-06-07 16:02:52 +0000115unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
116 unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
Alexander Kornienko00895102013-06-05 14:09:10 +0000117 return StartColumn + Prefix.size() + Postfix.size() +
Alexander Kornienko2785b9a2013-06-07 16:02:52 +0000118 encoding::getCodePointCount(Line.substr(Offset, Length), Encoding);
Alexander Kornienko70ce7882013-04-15 14:28:00 +0000119}
120
Manuel Klimekde008c02013-05-27 15:23:34 +0000121void BreakableSingleLineToken::insertBreak(unsigned LineIndex,
122 unsigned TailOffset, Split Split,
123 bool InPPDirective,
124 WhitespaceManager &Whitespaces) {
125 Whitespaces.breakToken(Tok, Prefix.size() + TailOffset + Split.first,
126 Split.second, Postfix, Prefix, InPPDirective,
127 StartColumn);
Alexander Kornienko70ce7882013-04-15 14:28:00 +0000128}
129
Manuel Klimekde008c02013-05-27 15:23:34 +0000130BreakableSingleLineToken::BreakableSingleLineToken(const FormatToken &Tok,
131 unsigned StartColumn,
132 StringRef Prefix,
Alexander Kornienko00895102013-06-05 14:09:10 +0000133 StringRef Postfix,
134 encoding::Encoding Encoding)
135 : BreakableToken(Tok, Encoding), StartColumn(StartColumn), Prefix(Prefix),
Manuel Klimekde008c02013-05-27 15:23:34 +0000136 Postfix(Postfix) {
137 assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
138 Line = Tok.TokenText.substr(
139 Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
Alexander Kornienko70ce7882013-04-15 14:28:00 +0000140}
141
Manuel Klimekde008c02013-05-27 15:23:34 +0000142BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok,
Alexander Kornienko00895102013-06-05 14:09:10 +0000143 unsigned StartColumn,
144 encoding::Encoding Encoding)
145 : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"", Encoding) {}
Manuel Klimekde008c02013-05-27 15:23:34 +0000146
147BreakableToken::Split
148BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
149 unsigned ColumnLimit) const {
Alexander Kornienko00895102013-06-05 14:09:10 +0000150 return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit,
151 Encoding);
Alexander Kornienko919398b2013-04-17 17:34:05 +0000152}
153
Manuel Klimekde008c02013-05-27 15:23:34 +0000154static StringRef getLineCommentPrefix(StringRef Comment) {
Alexander Kornienko919398b2013-04-17 17:34:05 +0000155 const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" };
Manuel Klimekde008c02013-05-27 15:23:34 +0000156 for (size_t i = 0, e = llvm::array_lengthof(KnownPrefixes); i != e; ++i)
Alexander Kornienko919398b2013-04-17 17:34:05 +0000157 if (Comment.startswith(KnownPrefixes[i]))
158 return KnownPrefixes[i];
159 return "";
160}
161
Manuel Klimekde008c02013-05-27 15:23:34 +0000162BreakableLineComment::BreakableLineComment(const FormatToken &Token,
Alexander Kornienko00895102013-06-05 14:09:10 +0000163 unsigned StartColumn,
164 encoding::Encoding Encoding)
Manuel Klimekde008c02013-05-27 15:23:34 +0000165 : BreakableSingleLineToken(Token, StartColumn,
Alexander Kornienko00895102013-06-05 14:09:10 +0000166 getLineCommentPrefix(Token.TokenText), "",
167 Encoding) {}
Manuel Klimekde008c02013-05-27 15:23:34 +0000168
169BreakableToken::Split
170BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset,
171 unsigned ColumnLimit) const {
172 return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(),
Alexander Kornienko00895102013-06-05 14:09:10 +0000173 ColumnLimit, Encoding);
Manuel Klimekde008c02013-05-27 15:23:34 +0000174}
175
Alexander Kornienko00895102013-06-05 14:09:10 +0000176BreakableBlockComment::BreakableBlockComment(
177 const FormatStyle &Style, const FormatToken &Token, unsigned StartColumn,
178 unsigned OriginalStartColumn, bool FirstInLine, encoding::Encoding Encoding)
179 : BreakableToken(Token, Encoding) {
Manuel Klimekde008c02013-05-27 15:23:34 +0000180 StringRef TokenText(Token.TokenText);
181 assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
182 TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
183
184 int IndentDelta = StartColumn - OriginalStartColumn;
185 bool NeedsStar = true;
186 LeadingWhitespace.resize(Lines.size());
187 StartOfLineColumn.resize(Lines.size());
188 if (Lines.size() == 1 && !FirstInLine) {
189 // Comments for which FirstInLine is false can start on arbitrary column,
190 // and available horizontal space can be too small to align consecutive
191 // lines with the first one.
192 // FIXME: We could, probably, align them to current indentation level, but
193 // now we just wrap them without stars.
194 NeedsStar = false;
195 }
196 StartOfLineColumn[0] = StartColumn + 2;
197 for (size_t i = 1; i < Lines.size(); ++i) {
198 adjustWhitespace(Style, i, IndentDelta);
199 if (Lines[i].empty())
200 // If the last line is empty, the closing "*/" will have a star.
201 NeedsStar = NeedsStar && i + 1 == Lines.size();
202 else
203 NeedsStar = NeedsStar && Lines[i][0] == '*';
204 }
205 Decoration = NeedsStar ? "* " : "";
206 IndentAtLineBreak = StartOfLineColumn[0] + 1;
207 for (size_t i = 1; i < Lines.size(); ++i) {
208 if (Lines[i].empty()) {
209 if (!NeedsStar && i + 1 != Lines.size())
210 // For all but the last line (which always ends in */), set the
211 // start column to 0 if they're empty, so we do not insert
212 // trailing whitespace anywhere.
213 StartOfLineColumn[i] = 0;
214 continue;
215 }
216 if (NeedsStar) {
217 // The first line already excludes the star.
218 // For all other lines, adjust the line to exclude the star and
219 // (optionally) the first whitespace.
220 int Offset = Lines[i].startswith("* ") ? 2 : 1;
221 StartOfLineColumn[i] += Offset;
222 Lines[i] = Lines[i].substr(Offset);
223 LeadingWhitespace[i] += Offset;
224 }
225 IndentAtLineBreak = std::min<int>(IndentAtLineBreak, StartOfLineColumn[i]);
226 }
Daniel Jaspercb4b40b2013-05-30 17:27:48 +0000227 IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());
Manuel Klimekde008c02013-05-27 15:23:34 +0000228 DEBUG({
229 for (size_t i = 0; i < Lines.size(); ++i) {
230 llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i]
231 << "\n";
232 }
233 });
234}
235
236void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
237 unsigned LineIndex,
238 int IndentDelta) {
239 // Calculate the end of the non-whitespace text in the previous line.
240 size_t EndOfPreviousLine = Lines[LineIndex - 1].find_last_not_of(" \\\t");
241 if (EndOfPreviousLine == StringRef::npos)
242 EndOfPreviousLine = 0;
243 else
244 ++EndOfPreviousLine;
245 // Calculate the start of the non-whitespace text in the current line.
246 size_t StartOfLine = Lines[LineIndex].find_first_not_of(" \t");
247 if (StartOfLine == StringRef::npos)
248 StartOfLine = Lines[LineIndex].size();
Manuel Klimekde008c02013-05-27 15:23:34 +0000249
250 // Adjust Lines to only contain relevant text.
251 Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine);
252 Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine);
253 // Adjust LeadingWhitespace to account all whitespace between the lines
254 // to the current line.
255 LeadingWhitespace[LineIndex] =
256 Lines[LineIndex].begin() - Lines[LineIndex - 1].end();
Manuel Klimekd63312b2013-05-28 10:01:59 +0000257
258 // FIXME: We currently count tabs as 1 character. To solve this, we need to
259 // get the correct indentation width of the start of the comment, which
260 // requires correct counting of the tab expansions before the comment, and
261 // a configurable tab width. Since the current implementation only breaks
262 // if leading tabs are intermixed with spaces, that is not a high priority.
263
Manuel Klimekde008c02013-05-27 15:23:34 +0000264 // Adjust the start column uniformly accross all lines.
Manuel Klimekd63312b2013-05-28 10:01:59 +0000265 StartOfLineColumn[LineIndex] = std::max<int>(0, StartOfLine + IndentDelta);
Manuel Klimekde008c02013-05-27 15:23:34 +0000266}
267
268unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }
269
Alexander Kornienko2785b9a2013-06-07 16:02:52 +0000270unsigned BreakableBlockComment::getLineLengthAfterSplit(
271 unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
272 return getContentStartColumn(LineIndex, Offset) +
273 encoding::getCodePointCount(Lines[LineIndex].substr(Offset, Length),
Alexander Kornienko00895102013-06-05 14:09:10 +0000274 Encoding) +
Manuel Klimekde008c02013-05-27 15:23:34 +0000275 // The last line gets a "*/" postfix.
276 (LineIndex + 1 == Lines.size() ? 2 : 0);
277}
278
279BreakableToken::Split
280BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset,
281 unsigned ColumnLimit) const {
282 return getCommentSplit(Lines[LineIndex].substr(TailOffset),
283 getContentStartColumn(LineIndex, TailOffset),
Alexander Kornienko00895102013-06-05 14:09:10 +0000284 ColumnLimit, Encoding);
Manuel Klimekde008c02013-05-27 15:23:34 +0000285}
286
287void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
288 Split Split, bool InPPDirective,
289 WhitespaceManager &Whitespaces) {
290 StringRef Text = Lines[LineIndex].substr(TailOffset);
291 StringRef Prefix = Decoration;
292 if (LineIndex + 1 == Lines.size() &&
293 Text.size() == Split.first + Split.second) {
294 // For the last line we need to break before "*/", but not to add "* ".
295 Prefix = "";
296 }
297
298 unsigned BreakOffsetInToken =
299 Text.data() - Tok.TokenText.data() + Split.first;
300 unsigned CharsToRemove = Split.second;
Manuel Klimekb6dba332013-05-30 07:45:53 +0000301 assert(IndentAtLineBreak >= Decoration.size());
Manuel Klimekde008c02013-05-27 15:23:34 +0000302 Whitespaces.breakToken(Tok, BreakOffsetInToken, CharsToRemove, "", Prefix,
303 InPPDirective, IndentAtLineBreak - Decoration.size());
304}
305
306void
307BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex,
308 unsigned InPPDirective,
309 WhitespaceManager &Whitespaces) {
310 if (LineIndex == 0)
311 return;
312 StringRef Prefix = Decoration;
Manuel Klimekc5cc4bf2013-05-28 08:55:01 +0000313 if (Lines[LineIndex].empty()) {
314 if (LineIndex + 1 == Lines.size()) {
315 // If the last line is empty, we don't need a prefix, as the */ will line
316 // up with the decoration (if it exists).
317 Prefix = "";
318 } else if (!Decoration.empty()) {
319 // For other empty lines, if we do have a decoration, adapt it to not
320 // contain a trailing whitespace.
321 Prefix = Prefix.substr(0, 1);
322 }
Daniel Jaspere2c482f2013-05-30 06:40:07 +0000323 } else {
324 if (StartOfLineColumn[LineIndex] == 1) {
325 // This lines starts immediately after the decorating *.
326 Prefix = Prefix.substr(0, 1);
327 }
Manuel Klimekc5cc4bf2013-05-28 08:55:01 +0000328 }
Manuel Klimekde008c02013-05-27 15:23:34 +0000329
330 unsigned WhitespaceOffsetInToken =
331 Lines[LineIndex].data() - Tok.TokenText.data() -
332 LeadingWhitespace[LineIndex];
Manuel Klimekb6dba332013-05-30 07:45:53 +0000333 assert(StartOfLineColumn[LineIndex] >= Prefix.size());
Manuel Klimekde008c02013-05-27 15:23:34 +0000334 Whitespaces.breakToken(
335 Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix,
336 InPPDirective, StartOfLineColumn[LineIndex] - Prefix.size());
337}
338
339unsigned
340BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
341 unsigned TailOffset) const {
342 // If we break, we always break at the predefined indent.
343 if (TailOffset != 0)
344 return IndentAtLineBreak;
345 return StartOfLineColumn[LineIndex];
346}
347
Alexander Kornienko70ce7882013-04-15 14:28:00 +0000348} // namespace format
349} // namespace clang