Alexander Kornienko | 70ce788 | 2013-04-15 14:28:00 +0000 | [diff] [blame] | 1 | //===--- BreakableToken.cpp - Format C++ code -----------------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | /// |
| 10 | /// \file |
| 11 | /// \brief Contains implementation of BreakableToken class and classes derived |
| 12 | /// from it. |
| 13 | /// |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
Manuel Klimek | de008c0 | 2013-05-27 15:23:34 +0000 | [diff] [blame] | 16 | #define DEBUG_TYPE "format-token-breaker" |
| 17 | |
Alexander Kornienko | 70ce788 | 2013-04-15 14:28:00 +0000 | [diff] [blame] | 18 | #include "BreakableToken.h" |
Manuel Klimek | de008c0 | 2013-05-27 15:23:34 +0000 | [diff] [blame] | 19 | #include "clang/Format/Format.h" |
Alexander Kornienko | 919398b | 2013-04-17 17:34:05 +0000 | [diff] [blame] | 20 | #include "llvm/ADT/STLExtras.h" |
Manuel Klimek | de008c0 | 2013-05-27 15:23:34 +0000 | [diff] [blame] | 21 | #include "llvm/Support/Debug.h" |
Alexander Kornienko | 70ce788 | 2013-04-15 14:28:00 +0000 | [diff] [blame] | 22 | #include <algorithm> |
| 23 | |
| 24 | namespace clang { |
| 25 | namespace format { |
Manuel Klimek | de008c0 | 2013-05-27 15:23:34 +0000 | [diff] [blame] | 26 | namespace { |
Alexander Kornienko | 70ce788 | 2013-04-15 14:28:00 +0000 | [diff] [blame] | 27 | |
Manuel Klimek | de008c0 | 2013-05-27 15:23:34 +0000 | [diff] [blame] | 28 | // FIXME: Move helper string functions to where it makes sense. |
| 29 | |
| 30 | unsigned getOctalLength(StringRef Text) { |
| 31 | unsigned I = 1; |
| 32 | while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) { |
| 33 | ++I; |
| 34 | } |
| 35 | return I; |
| 36 | } |
| 37 | |
| 38 | unsigned getHexLength(StringRef Text) { |
| 39 | unsigned I = 2; // Point after '\x'. |
| 40 | while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') || |
| 41 | (Text[I] >= 'a' && Text[I] <= 'f') || |
| 42 | (Text[I] >= 'A' && Text[I] <= 'F'))) { |
| 43 | ++I; |
| 44 | } |
| 45 | return I; |
| 46 | } |
| 47 | |
| 48 | unsigned getEscapeSequenceLength(StringRef Text) { |
| 49 | assert(Text[0] == '\\'); |
| 50 | if (Text.size() < 2) |
| 51 | return 1; |
| 52 | |
| 53 | switch (Text[1]) { |
| 54 | case 'u': |
| 55 | return 6; |
| 56 | case 'U': |
| 57 | return 10; |
| 58 | case 'x': |
| 59 | return getHexLength(Text); |
| 60 | default: |
| 61 | if (Text[1] >= '0' && Text[1] <= '7') |
| 62 | return getOctalLength(Text); |
| 63 | return 2; |
| 64 | } |
| 65 | } |
| 66 | |
| 67 | StringRef::size_type getStartOfCharacter(StringRef Text, |
| 68 | StringRef::size_type Offset) { |
| 69 | StringRef::size_type NextEscape = Text.find('\\'); |
| 70 | while (NextEscape != StringRef::npos && NextEscape < Offset) { |
| 71 | StringRef::size_type SequenceLength = |
| 72 | getEscapeSequenceLength(Text.substr(NextEscape)); |
| 73 | if (Offset < NextEscape + SequenceLength) |
| 74 | return NextEscape; |
| 75 | NextEscape = Text.find('\\', NextEscape + SequenceLength); |
| 76 | } |
| 77 | return Offset; |
| 78 | } |
| 79 | |
| 80 | BreakableToken::Split getCommentSplit(StringRef Text, |
| 81 | unsigned ContentStartColumn, |
| 82 | unsigned ColumnLimit) { |
Alexander Kornienko | 919398b | 2013-04-17 17:34:05 +0000 | [diff] [blame] | 83 | if (ColumnLimit <= ContentStartColumn + 1) |
Manuel Klimek | de008c0 | 2013-05-27 15:23:34 +0000 | [diff] [blame] | 84 | return BreakableToken::Split(StringRef::npos, 0); |
Alexander Kornienko | 919398b | 2013-04-17 17:34:05 +0000 | [diff] [blame] | 85 | |
| 86 | unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1; |
| 87 | StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit); |
| 88 | if (SpaceOffset == StringRef::npos || |
Manuel Klimek | de008c0 | 2013-05-27 15:23:34 +0000 | [diff] [blame] | 89 | // Don't break at leading whitespace. |
Manuel Klimek | be9ed77 | 2013-05-29 22:06:18 +0000 | [diff] [blame^] | 90 | Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos) { |
| 91 | // Make sure that we don't break at leading whitespace that |
| 92 | // reaches past MaxSplit. |
| 93 | StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(" "); |
| 94 | if (FirstNonWhitespace == StringRef::npos) |
| 95 | // If the comment is only whitespace, we cannot split. |
| 96 | return BreakableToken::Split(StringRef::npos, 0); |
| 97 | SpaceOffset = |
| 98 | Text.find(' ', std::max<unsigned>(MaxSplit, FirstNonWhitespace)); |
| 99 | } |
Alexander Kornienko | 919398b | 2013-04-17 17:34:05 +0000 | [diff] [blame] | 100 | if (SpaceOffset != StringRef::npos && SpaceOffset != 0) { |
| 101 | StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(); |
| 102 | StringRef AfterCut = Text.substr(SpaceOffset).ltrim(); |
| 103 | return BreakableToken::Split(BeforeCut.size(), |
| 104 | AfterCut.begin() - BeforeCut.end()); |
| 105 | } |
| 106 | return BreakableToken::Split(StringRef::npos, 0); |
| 107 | } |
| 108 | |
Manuel Klimek | de008c0 | 2013-05-27 15:23:34 +0000 | [diff] [blame] | 109 | BreakableToken::Split getStringSplit(StringRef Text, |
| 110 | unsigned ContentStartColumn, |
| 111 | unsigned ColumnLimit) { |
Alexander Kornienko | 919398b | 2013-04-17 17:34:05 +0000 | [diff] [blame] | 112 | |
Manuel Klimek | de008c0 | 2013-05-27 15:23:34 +0000 | [diff] [blame] | 113 | if (ColumnLimit <= ContentStartColumn) |
| 114 | return BreakableToken::Split(StringRef::npos, 0); |
| 115 | unsigned MaxSplit = ColumnLimit - ContentStartColumn; |
| 116 | // FIXME: Reduce unit test case. |
| 117 | if (Text.empty()) |
| 118 | return BreakableToken::Split(StringRef::npos, 0); |
| 119 | MaxSplit = std::min<unsigned>(MaxSplit, Text.size() - 1); |
| 120 | StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit); |
| 121 | if (SpaceOffset != StringRef::npos && SpaceOffset != 0) |
| 122 | return BreakableToken::Split(SpaceOffset + 1, 0); |
| 123 | StringRef::size_type SlashOffset = Text.rfind('/', MaxSplit); |
| 124 | if (SlashOffset != StringRef::npos && SlashOffset != 0) |
| 125 | return BreakableToken::Split(SlashOffset + 1, 0); |
| 126 | StringRef::size_type SplitPoint = getStartOfCharacter(Text, MaxSplit); |
| 127 | if (SplitPoint == StringRef::npos || SplitPoint == 0) |
| 128 | return BreakableToken::Split(StringRef::npos, 0); |
| 129 | return BreakableToken::Split(SplitPoint, 0); |
Alexander Kornienko | 919398b | 2013-04-17 17:34:05 +0000 | [diff] [blame] | 130 | } |
| 131 | |
Manuel Klimek | de008c0 | 2013-05-27 15:23:34 +0000 | [diff] [blame] | 132 | } // namespace |
Alexander Kornienko | 70ce788 | 2013-04-15 14:28:00 +0000 | [diff] [blame] | 133 | |
Manuel Klimek | de008c0 | 2013-05-27 15:23:34 +0000 | [diff] [blame] | 134 | unsigned BreakableSingleLineToken::getLineCount() const { return 1; } |
Alexander Kornienko | 70ce788 | 2013-04-15 14:28:00 +0000 | [diff] [blame] | 135 | |
Manuel Klimek | de008c0 | 2013-05-27 15:23:34 +0000 | [diff] [blame] | 136 | unsigned |
| 137 | BreakableSingleLineToken::getLineLengthAfterSplit(unsigned LineIndex, |
| 138 | unsigned TailOffset) const { |
| 139 | return StartColumn + Prefix.size() + Postfix.size() + Line.size() - |
| 140 | TailOffset; |
Alexander Kornienko | 70ce788 | 2013-04-15 14:28:00 +0000 | [diff] [blame] | 141 | } |
| 142 | |
Manuel Klimek | de008c0 | 2013-05-27 15:23:34 +0000 | [diff] [blame] | 143 | void BreakableSingleLineToken::insertBreak(unsigned LineIndex, |
| 144 | unsigned TailOffset, Split Split, |
| 145 | bool InPPDirective, |
| 146 | WhitespaceManager &Whitespaces) { |
| 147 | Whitespaces.breakToken(Tok, Prefix.size() + TailOffset + Split.first, |
| 148 | Split.second, Postfix, Prefix, InPPDirective, |
| 149 | StartColumn); |
Alexander Kornienko | 70ce788 | 2013-04-15 14:28:00 +0000 | [diff] [blame] | 150 | } |
| 151 | |
Manuel Klimek | de008c0 | 2013-05-27 15:23:34 +0000 | [diff] [blame] | 152 | BreakableSingleLineToken::BreakableSingleLineToken(const FormatToken &Tok, |
| 153 | unsigned StartColumn, |
| 154 | StringRef Prefix, |
| 155 | StringRef Postfix) |
| 156 | : BreakableToken(Tok), StartColumn(StartColumn), Prefix(Prefix), |
| 157 | Postfix(Postfix) { |
| 158 | assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix)); |
| 159 | Line = Tok.TokenText.substr( |
| 160 | Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); |
Alexander Kornienko | 70ce788 | 2013-04-15 14:28:00 +0000 | [diff] [blame] | 161 | } |
| 162 | |
Manuel Klimek | de008c0 | 2013-05-27 15:23:34 +0000 | [diff] [blame] | 163 | BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok, |
| 164 | unsigned StartColumn) |
| 165 | : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"") {} |
| 166 | |
| 167 | BreakableToken::Split |
| 168 | BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, |
| 169 | unsigned ColumnLimit) const { |
| 170 | return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit); |
Alexander Kornienko | 919398b | 2013-04-17 17:34:05 +0000 | [diff] [blame] | 171 | } |
| 172 | |
Manuel Klimek | de008c0 | 2013-05-27 15:23:34 +0000 | [diff] [blame] | 173 | static StringRef getLineCommentPrefix(StringRef Comment) { |
Alexander Kornienko | 919398b | 2013-04-17 17:34:05 +0000 | [diff] [blame] | 174 | const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" }; |
Manuel Klimek | de008c0 | 2013-05-27 15:23:34 +0000 | [diff] [blame] | 175 | for (size_t i = 0, e = llvm::array_lengthof(KnownPrefixes); i != e; ++i) |
Alexander Kornienko | 919398b | 2013-04-17 17:34:05 +0000 | [diff] [blame] | 176 | if (Comment.startswith(KnownPrefixes[i])) |
| 177 | return KnownPrefixes[i]; |
| 178 | return ""; |
| 179 | } |
| 180 | |
Manuel Klimek | de008c0 | 2013-05-27 15:23:34 +0000 | [diff] [blame] | 181 | BreakableLineComment::BreakableLineComment(const FormatToken &Token, |
| 182 | unsigned StartColumn) |
| 183 | : BreakableSingleLineToken(Token, StartColumn, |
| 184 | getLineCommentPrefix(Token.TokenText), "") {} |
| 185 | |
| 186 | BreakableToken::Split |
| 187 | BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset, |
| 188 | unsigned ColumnLimit) const { |
| 189 | return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(), |
| 190 | ColumnLimit); |
| 191 | } |
| 192 | |
| 193 | BreakableBlockComment::BreakableBlockComment(const FormatStyle &Style, |
| 194 | const FormatToken &Token, |
| 195 | unsigned StartColumn, |
| 196 | unsigned OriginalStartColumn, |
| 197 | bool FirstInLine) |
| 198 | : BreakableToken(Token) { |
| 199 | StringRef TokenText(Token.TokenText); |
| 200 | assert(TokenText.startswith("/*") && TokenText.endswith("*/")); |
| 201 | TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n"); |
| 202 | |
| 203 | int IndentDelta = StartColumn - OriginalStartColumn; |
| 204 | bool NeedsStar = true; |
| 205 | LeadingWhitespace.resize(Lines.size()); |
| 206 | StartOfLineColumn.resize(Lines.size()); |
| 207 | if (Lines.size() == 1 && !FirstInLine) { |
| 208 | // Comments for which FirstInLine is false can start on arbitrary column, |
| 209 | // and available horizontal space can be too small to align consecutive |
| 210 | // lines with the first one. |
| 211 | // FIXME: We could, probably, align them to current indentation level, but |
| 212 | // now we just wrap them without stars. |
| 213 | NeedsStar = false; |
| 214 | } |
| 215 | StartOfLineColumn[0] = StartColumn + 2; |
| 216 | for (size_t i = 1; i < Lines.size(); ++i) { |
| 217 | adjustWhitespace(Style, i, IndentDelta); |
| 218 | if (Lines[i].empty()) |
| 219 | // If the last line is empty, the closing "*/" will have a star. |
| 220 | NeedsStar = NeedsStar && i + 1 == Lines.size(); |
| 221 | else |
| 222 | NeedsStar = NeedsStar && Lines[i][0] == '*'; |
| 223 | } |
| 224 | Decoration = NeedsStar ? "* " : ""; |
| 225 | IndentAtLineBreak = StartOfLineColumn[0] + 1; |
| 226 | for (size_t i = 1; i < Lines.size(); ++i) { |
| 227 | if (Lines[i].empty()) { |
| 228 | if (!NeedsStar && i + 1 != Lines.size()) |
| 229 | // For all but the last line (which always ends in */), set the |
| 230 | // start column to 0 if they're empty, so we do not insert |
| 231 | // trailing whitespace anywhere. |
| 232 | StartOfLineColumn[i] = 0; |
| 233 | continue; |
| 234 | } |
| 235 | if (NeedsStar) { |
| 236 | // The first line already excludes the star. |
| 237 | // For all other lines, adjust the line to exclude the star and |
| 238 | // (optionally) the first whitespace. |
| 239 | int Offset = Lines[i].startswith("* ") ? 2 : 1; |
| 240 | StartOfLineColumn[i] += Offset; |
| 241 | Lines[i] = Lines[i].substr(Offset); |
| 242 | LeadingWhitespace[i] += Offset; |
| 243 | } |
| 244 | IndentAtLineBreak = std::min<int>(IndentAtLineBreak, StartOfLineColumn[i]); |
| 245 | } |
| 246 | DEBUG({ |
| 247 | for (size_t i = 0; i < Lines.size(); ++i) { |
| 248 | llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i] |
| 249 | << "\n"; |
| 250 | } |
| 251 | }); |
| 252 | } |
| 253 | |
| 254 | void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style, |
| 255 | unsigned LineIndex, |
| 256 | int IndentDelta) { |
| 257 | // Calculate the end of the non-whitespace text in the previous line. |
| 258 | size_t EndOfPreviousLine = Lines[LineIndex - 1].find_last_not_of(" \\\t"); |
| 259 | if (EndOfPreviousLine == StringRef::npos) |
| 260 | EndOfPreviousLine = 0; |
| 261 | else |
| 262 | ++EndOfPreviousLine; |
| 263 | // Calculate the start of the non-whitespace text in the current line. |
| 264 | size_t StartOfLine = Lines[LineIndex].find_first_not_of(" \t"); |
| 265 | if (StartOfLine == StringRef::npos) |
| 266 | StartOfLine = Lines[LineIndex].size(); |
Manuel Klimek | de008c0 | 2013-05-27 15:23:34 +0000 | [diff] [blame] | 267 | |
| 268 | // Adjust Lines to only contain relevant text. |
| 269 | Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine); |
| 270 | Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine); |
| 271 | // Adjust LeadingWhitespace to account all whitespace between the lines |
| 272 | // to the current line. |
| 273 | LeadingWhitespace[LineIndex] = |
| 274 | Lines[LineIndex].begin() - Lines[LineIndex - 1].end(); |
Manuel Klimek | d63312b | 2013-05-28 10:01:59 +0000 | [diff] [blame] | 275 | |
| 276 | // FIXME: We currently count tabs as 1 character. To solve this, we need to |
| 277 | // get the correct indentation width of the start of the comment, which |
| 278 | // requires correct counting of the tab expansions before the comment, and |
| 279 | // a configurable tab width. Since the current implementation only breaks |
| 280 | // if leading tabs are intermixed with spaces, that is not a high priority. |
| 281 | |
Manuel Klimek | de008c0 | 2013-05-27 15:23:34 +0000 | [diff] [blame] | 282 | // Adjust the start column uniformly accross all lines. |
Manuel Klimek | d63312b | 2013-05-28 10:01:59 +0000 | [diff] [blame] | 283 | StartOfLineColumn[LineIndex] = std::max<int>(0, StartOfLine + IndentDelta); |
Manuel Klimek | de008c0 | 2013-05-27 15:23:34 +0000 | [diff] [blame] | 284 | } |
| 285 | |
| 286 | unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); } |
| 287 | |
| 288 | unsigned |
| 289 | BreakableBlockComment::getLineLengthAfterSplit(unsigned LineIndex, |
| 290 | unsigned TailOffset) const { |
| 291 | return getContentStartColumn(LineIndex, TailOffset) + |
| 292 | (Lines[LineIndex].size() - TailOffset) + |
| 293 | // The last line gets a "*/" postfix. |
| 294 | (LineIndex + 1 == Lines.size() ? 2 : 0); |
| 295 | } |
| 296 | |
| 297 | BreakableToken::Split |
| 298 | BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset, |
| 299 | unsigned ColumnLimit) const { |
| 300 | return getCommentSplit(Lines[LineIndex].substr(TailOffset), |
| 301 | getContentStartColumn(LineIndex, TailOffset), |
| 302 | ColumnLimit); |
| 303 | } |
| 304 | |
| 305 | void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, |
| 306 | Split Split, bool InPPDirective, |
| 307 | WhitespaceManager &Whitespaces) { |
| 308 | StringRef Text = Lines[LineIndex].substr(TailOffset); |
| 309 | StringRef Prefix = Decoration; |
| 310 | if (LineIndex + 1 == Lines.size() && |
| 311 | Text.size() == Split.first + Split.second) { |
| 312 | // For the last line we need to break before "*/", but not to add "* ". |
| 313 | Prefix = ""; |
| 314 | } |
| 315 | |
| 316 | unsigned BreakOffsetInToken = |
| 317 | Text.data() - Tok.TokenText.data() + Split.first; |
| 318 | unsigned CharsToRemove = Split.second; |
| 319 | Whitespaces.breakToken(Tok, BreakOffsetInToken, CharsToRemove, "", Prefix, |
| 320 | InPPDirective, IndentAtLineBreak - Decoration.size()); |
| 321 | } |
| 322 | |
| 323 | void |
| 324 | BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex, |
| 325 | unsigned InPPDirective, |
| 326 | WhitespaceManager &Whitespaces) { |
| 327 | if (LineIndex == 0) |
| 328 | return; |
| 329 | StringRef Prefix = Decoration; |
Manuel Klimek | c5cc4bf | 2013-05-28 08:55:01 +0000 | [diff] [blame] | 330 | if (Lines[LineIndex].empty()) { |
| 331 | if (LineIndex + 1 == Lines.size()) { |
| 332 | // If the last line is empty, we don't need a prefix, as the */ will line |
| 333 | // up with the decoration (if it exists). |
| 334 | Prefix = ""; |
| 335 | } else if (!Decoration.empty()) { |
| 336 | // For other empty lines, if we do have a decoration, adapt it to not |
| 337 | // contain a trailing whitespace. |
| 338 | Prefix = Prefix.substr(0, 1); |
| 339 | } |
| 340 | } |
Manuel Klimek | de008c0 | 2013-05-27 15:23:34 +0000 | [diff] [blame] | 341 | |
| 342 | unsigned WhitespaceOffsetInToken = |
| 343 | Lines[LineIndex].data() - Tok.TokenText.data() - |
| 344 | LeadingWhitespace[LineIndex]; |
| 345 | Whitespaces.breakToken( |
| 346 | Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix, |
| 347 | InPPDirective, StartOfLineColumn[LineIndex] - Prefix.size()); |
| 348 | } |
| 349 | |
| 350 | unsigned |
| 351 | BreakableBlockComment::getContentStartColumn(unsigned LineIndex, |
| 352 | unsigned TailOffset) const { |
| 353 | // If we break, we always break at the predefined indent. |
| 354 | if (TailOffset != 0) |
| 355 | return IndentAtLineBreak; |
| 356 | return StartOfLineColumn[LineIndex]; |
| 357 | } |
| 358 | |
Alexander Kornienko | 70ce788 | 2013-04-15 14:28:00 +0000 | [diff] [blame] | 359 | } // namespace format |
| 360 | } // namespace clang |