Restructure how we break tokens.
This fixes some bugs in the reflowing logic and splits out the concerns
of reflowing from BreakableToken.
Things to do after this patch:
- Refactor the breakProtrudingToken function possibly into a class, so we
can split it up into methods that operate on the common state.
- Optimize whitespace compression when reflowing by using the next possible
split point instead of the latest possible split point.
- Retry different strategies for reflowing (strictly staying below the
column limit vs. allowing excess characters if possible).
Differential Revision: https://reviews.llvm.org/D40310
llvm-svn: 319314
diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
index 44e6ad4..c0d2bf6 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -1484,23 +1484,25 @@
LineState &State,
bool AllowBreak,
bool DryRun) {
- std::unique_ptr<BreakableToken> Token =
+ std::unique_ptr<const BreakableToken> Token =
createBreakableToken(Current, State, AllowBreak);
if (!Token)
return 0;
+ assert(Token->getLineCount() > 0);
unsigned ColumnLimit = getColumnLimit(State);
- unsigned StartColumn = State.Column - Current.ColumnWidth;
if (Current.is(TT_LineComment)) {
// We don't insert backslashes when breaking line comments.
ColumnLimit = Style.ColumnLimit;
}
if (Current.UnbreakableTailLength >= ColumnLimit)
return 0;
-
+ // ColumnWidth was already accounted into State.Column before calling
+ // breakProtrudingToken.
+ unsigned StartColumn = State.Column - Current.ColumnWidth;
unsigned NewBreakPenalty = Current.isStringLiteral()
? Style.PenaltyBreakString
: Style.PenaltyBreakComment;
- unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength;
+ // Stores whether we introduce a break anywhere in the token.
bool BreakInserted = Token->introducesBreakBeforeToken();
// Store whether we inserted a new line break at the end of the previous
// logical line.
@@ -1508,145 +1510,295 @@
// We use a conservative reflowing strategy. Reflow starts after a line is
// broken or the corresponding whitespace compressed. Reflow ends as soon as a
// line that doesn't get reflown with the previous line is reached.
- bool ReflowInProgress = false;
- unsigned Penalty = 0;
- unsigned RemainingTokenColumns = 0;
+ bool Reflow = false;
+ // Keep track of where we are in the token:
+ // Where we are in the content of the current logical line.
unsigned TailOffset = 0;
+ // The column number we're currently at.
+ unsigned ContentStartColumn =
+ Token->getContentStartColumn(0, /*Break=*/false);
+ // The number of columns left in the current logical line after TailOffset.
+ unsigned RemainingTokenColumns =
+ Token->getRemainingLength(0, TailOffset, ContentStartColumn);
+ // Adapt the start of the token, for example indent.
+ if (!DryRun)
+ Token->adaptStartOfLine(0, Whitespaces);
+
+ unsigned Penalty = 0;
DEBUG(llvm::dbgs() << "Breaking protruding token at column " << StartColumn
<< ".\n");
for (unsigned LineIndex = 0, EndIndex = Token->getLineCount();
LineIndex != EndIndex; ++LineIndex) {
- DEBUG(llvm::dbgs() << " Line: " << LineIndex
- << " (Reflow: " << ReflowInProgress << ")\n");
- BreakableToken::Split SplitBefore(StringRef::npos, 0);
- if (ReflowInProgress) {
- SplitBefore = Token->getSplitBefore(LineIndex, RemainingTokenColumns,
- RemainingSpace, CommentPragmasRegex);
- }
- ReflowInProgress = SplitBefore.first != StringRef::npos;
- DEBUG({
- if (ReflowInProgress)
- llvm::dbgs() << " Reflowing.\n";
- });
- TailOffset =
- ReflowInProgress ? (SplitBefore.first + SplitBefore.second) : 0;
- // If we found a reflow split and have added a new break before this line,
- // we are going to remove the line break at the start of the next logical
- // line.
- // For example, here we'll add a new line break after 'text', and
- // subsequently delete the line break between 'that' and 'reflows'.
- // // some text that
- // // reflows
- // ->
- // // some text
- // // that reflows
- // When adding the line break, we also added the penalty for it, so we need
- // to subtract that penalty again when we remove the line break due to
- // reflowing.
- if (ReflowInProgress && NewBreakBefore) {
- assert(Penalty >= NewBreakPenalty);
- Penalty -= NewBreakPenalty;
- }
+ DEBUG(llvm::dbgs() << " Line: " << LineIndex << " (Reflow: " << Reflow
+ << ")\n");
NewBreakBefore = false;
- if (!DryRun)
- Token->replaceWhitespaceBefore(LineIndex, RemainingTokenColumns,
- RemainingSpace, SplitBefore, Whitespaces);
- RemainingTokenColumns = Token->getLineLengthAfterSplitBefore(
- LineIndex, TailOffset, RemainingTokenColumns, ColumnLimit, SplitBefore);
- while (RemainingTokenColumns > RemainingSpace) {
- DEBUG(llvm::dbgs() << " Over limit, need: " << RemainingTokenColumns
- << ", space: " << RemainingSpace << "\n");
- BreakableToken::Split Split = Token->getSplit(
- LineIndex, TailOffset, ColumnLimit, CommentPragmasRegex);
+ // If we did reflow the previous line, we'll try reflowing again. Otherwise
+ // we'll start reflowing if the current line is broken or whitespace is
+ // compressed.
+ bool TryReflow = Reflow;
+ // Break the current token until we can fit the rest of the line.
+ while (ContentStartColumn + RemainingTokenColumns > ColumnLimit) {
+ DEBUG(llvm::dbgs() << " Over limit, need: "
+ << (ContentStartColumn + RemainingTokenColumns)
+ << ", space: " << ColumnLimit
+ << ", reflown prefix: " << ContentStartColumn
+ << ", offset in line: " << TailOffset << "\n");
+ // If the current token doesn't fit, find the latest possible split in the
+ // current line so that breaking at it will be under the column limit.
+ // FIXME: Use the earliest possible split while reflowing to correctly
+ // compress whitespace within a line.
+ BreakableToken::Split Split =
+ Token->getSplit(LineIndex, TailOffset, ColumnLimit,
+ ContentStartColumn, CommentPragmasRegex);
if (Split.first == StringRef::npos) {
- // The last line's penalty is handled in addNextStateToQueue().
+ // No break opportunity - update the penalty and continue with the next
+ // logical line.
if (LineIndex < EndIndex - 1)
+ // The last line's penalty is handled in addNextStateToQueue().
Penalty += Style.PenaltyExcessCharacter *
- (RemainingTokenColumns - RemainingSpace);
+ (ContentStartColumn + RemainingTokenColumns - ColumnLimit);
DEBUG(llvm::dbgs() << " No break opportunity.\n");
break;
}
assert(Split.first != 0);
- // Check if compressing the whitespace range will bring the line length
- // under the limit. If that is the case, we perform whitespace compression
- // instead of inserting a line break.
- unsigned RemainingTokenColumnsAfterCompression =
- Token->getLineLengthAfterCompression(RemainingTokenColumns, Split);
- if (RemainingTokenColumnsAfterCompression <= RemainingSpace) {
- RemainingTokenColumns = RemainingTokenColumnsAfterCompression;
- ReflowInProgress = true;
- if (!DryRun)
- Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces);
- DEBUG(llvm::dbgs() << " Compressing below limit.\n");
- break;
+ if (Token->supportsReflow()) {
+ // Check whether the next natural split point after the current one can
+ // still fit the line, either because we can compress away whitespace,
+ // or because the penalty the excess characters introduce is lower than
+ // the break penalty.
+ // We only do this for tokens that support reflowing, and thus allow us
+ // to change the whitespace arbitrarily (e.g. comments).
+ // Other tokens, like string literals, can be broken on arbitrary
+ // positions.
+
+ // First, compute the columns from TailOffset to the next possible split
+ // position.
+ // For example:
+ // ColumnLimit: |
+ // // Some text that breaks
+ // ^ tail offset
+ // ^-- split
+ // ^-------- to split columns
+ // ^--- next split
+ // ^--------------- to next split columns
+ unsigned ToSplitColumns = Token->getRangeLength(
+ LineIndex, TailOffset, Split.first, ContentStartColumn);
+ DEBUG(llvm::dbgs() << " ToSplit: " << ToSplitColumns << "\n");
+
+ BreakableToken::Split NextSplit = Token->getSplit(
+ LineIndex, TailOffset + Split.first + Split.second, ColumnLimit,
+ ContentStartColumn + ToSplitColumns + 1, CommentPragmasRegex);
+ // Compute the columns necessary to fit the next non-breakable sequence
+ // into the current line.
+ unsigned ToNextSplitColumns = 0;
+ if (NextSplit.first == StringRef::npos) {
+ ToNextSplitColumns = Token->getRemainingLength(LineIndex, TailOffset,
+ ContentStartColumn);
+ } else {
+ ToNextSplitColumns = Token->getRangeLength(
+ LineIndex, TailOffset,
+ Split.first + Split.second + NextSplit.first, ContentStartColumn);
+ }
+ // Compress the whitespace between the break and the start of the next
+ // unbreakable sequence.
+ ToNextSplitColumns =
+ Token->getLengthAfterCompression(ToNextSplitColumns, Split);
+ DEBUG(llvm::dbgs() << " ContentStartColumn: " << ContentStartColumn
+ << "\n");
+ DEBUG(llvm::dbgs() << " ToNextSplit: " << ToNextSplitColumns << "\n");
+ // If the whitespace compression makes us fit, continue on the current
+ // line.
+ bool ContinueOnLine =
+ ContentStartColumn + ToNextSplitColumns <= ColumnLimit;
+ unsigned ExcessCharactersPenalty = 0;
+ if (!ContinueOnLine) {
+ // Similarly, if the excess characters' penalty is lower than the
+ // penalty of introducing a new break, continue on the current line.
+ ExcessCharactersPenalty =
+ (ContentStartColumn + ToNextSplitColumns - ColumnLimit) *
+ Style.PenaltyExcessCharacter;
+ DEBUG(llvm::dbgs()
+ << " Penalty excess: " << ExcessCharactersPenalty
+ << "\n break : " << NewBreakPenalty << "\n");
+ if (ExcessCharactersPenalty < NewBreakPenalty)
+ ContinueOnLine = true;
+ }
+ if (ContinueOnLine) {
+ DEBUG(llvm::dbgs() << " Continuing on line...\n");
+ // The current line fits after compressing the whitespace - reflow
+ // the next line into it if possible.
+ TryReflow = true;
+ if (!DryRun)
+ Token->compressWhitespace(LineIndex, TailOffset, Split,
+ Whitespaces);
+ // When we continue on the same line, leave one space between content.
+ ContentStartColumn += ToSplitColumns + 1;
+ Penalty += ExcessCharactersPenalty;
+ TailOffset += Split.first + Split.second;
+ RemainingTokenColumns = Token->getRemainingLength(
+ LineIndex, TailOffset, ContentStartColumn);
+ continue;
+ }
}
-
- // Compute both the penalties for:
- // - not breaking, and leaving excess characters
- // - adding a new line break
- assert(RemainingTokenColumnsAfterCompression > RemainingSpace);
- unsigned ExcessCharactersPenalty =
- (RemainingTokenColumnsAfterCompression - RemainingSpace) *
- Style.PenaltyExcessCharacter;
-
- unsigned BreakPenalty = NewBreakPenalty;
- unsigned ColumnsUsed =
- Token->getLineLengthAfterSplit(LineIndex, TailOffset, Split.first);
- if (ColumnsUsed > ColumnLimit)
- BreakPenalty +=
- Style.PenaltyExcessCharacter * (ColumnsUsed - ColumnLimit);
-
- DEBUG(llvm::dbgs() << " Penalty excess: " << ExcessCharactersPenalty
- << "\n break : " << BreakPenalty << "\n");
- // Only continue to add the line break if the penalty of the excess
- // characters is larger than the penalty of the line break.
- // FIXME: This does not take into account when we can later remove the
- // line break again due to a reflow.
- if (ExcessCharactersPenalty < BreakPenalty) {
- if (!DryRun)
- Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces);
- // Do not set ReflowInProgress: we do not have any space left to
- // reflow into.
- Penalty += ExcessCharactersPenalty;
- break;
- }
-
- unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit(
- LineIndex, TailOffset + Split.first + Split.second, StringRef::npos);
+ DEBUG(llvm::dbgs() << " Breaking...\n");
+ ContentStartColumn =
+ Token->getContentStartColumn(LineIndex, /*Break=*/true);
+ unsigned NewRemainingTokenColumns = Token->getRemainingLength(
+ LineIndex, TailOffset + Split.first + Split.second,
+ ContentStartColumn);
// When breaking before a tab character, it may be moved by a few columns,
// but will still be expanded to the next tab stop, so we don't save any
// columns.
- if (NewRemainingTokenColumns == RemainingTokenColumns)
+ if (NewRemainingTokenColumns == RemainingTokenColumns) {
// FIXME: Do we need to adjust the penalty?
break;
+ }
assert(NewRemainingTokenColumns < RemainingTokenColumns);
+ DEBUG(llvm::dbgs() << " Breaking at: " << TailOffset + Split.first
+ << ", " << Split.second << "\n");
if (!DryRun)
Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces);
- Penalty += BreakPenalty;
+ Penalty += NewBreakPenalty;
TailOffset += Split.first + Split.second;
RemainingTokenColumns = NewRemainingTokenColumns;
- ReflowInProgress = true;
BreakInserted = true;
NewBreakBefore = true;
}
+ // In case there's another line, prepare the state for the start of the next
+ // line.
+ if (LineIndex + 1 != EndIndex) {
+ unsigned NextLineIndex = LineIndex + 1;
+ if (NewBreakBefore)
+ // After breaking a line, try to reflow the next line into the current
+ // one once RemainingTokenColumns fits.
+ TryReflow = true;
+ if (TryReflow) {
+ // We decided that we want to try reflowing the next line into the
+ // current one.
+ // We will now adjust the state as if the reflow is successful (in
+ // preparation for the next line), and see whether that works. If we
+ // decide that we cannot reflow, we will later reset the state to the
+ // start of the next line.
+ Reflow = false;
+ // As we did not continue breaking the line, RemainingTokenColumns is
+ // known to fit after ContentStartColumn. Adapt ContentStartColumn to
+ // the position at which we want to format the next line if we do
+ // actually reflow.
+ // When we reflow, we need to add a space between the end of the current
+ // line and the next line's start column.
+ ContentStartColumn += RemainingTokenColumns + 1;
+ // Get the split that we need to reflow next logical line into the end
+ // of the current one; the split will include any leading whitespace of
+ // the next logical line.
+ BreakableToken::Split SplitBeforeNext =
+ Token->getReflowSplit(NextLineIndex, CommentPragmasRegex);
+ DEBUG(llvm::dbgs() << " Size of reflown text: " << ContentStartColumn
+ << "\n Potential reflow split: ");
+ if (SplitBeforeNext.first != StringRef::npos) {
+ DEBUG(llvm::dbgs() << SplitBeforeNext.first << ", "
+ << SplitBeforeNext.second << "\n");
+ TailOffset = SplitBeforeNext.first + SplitBeforeNext.second;
+ // If the rest of the next line fits into the current line below the
+ // column limit, we can safely reflow.
+ RemainingTokenColumns = Token->getRemainingLength(
+ NextLineIndex, TailOffset, ContentStartColumn);
+ Reflow = true;
+ if (ContentStartColumn + RemainingTokenColumns > ColumnLimit) {
+ DEBUG(llvm::dbgs() << " Over limit after reflow, need: "
+ << (ContentStartColumn + RemainingTokenColumns)
+ << ", space: " << ColumnLimit
+ << ", reflown prefix: " << ContentStartColumn
+ << ", offset in line: " << TailOffset << "\n");
+ // If the whole next line does not fit, try to find a point in
+ // the next line at which we can break so that attaching the part
+ // of the next line to that break point onto the current line is
+ // below the column limit.
+ BreakableToken::Split Split =
+ Token->getSplit(NextLineIndex, TailOffset, ColumnLimit,
+ ContentStartColumn, CommentPragmasRegex);
+ if (Split.first == StringRef::npos) {
+ DEBUG(llvm::dbgs() << " Did not find later break\n");
+ Reflow = false;
+ } else {
+ // Check whether the first split point gets us below the column
+ // limit. Note that we will execute this split below as part of
+ // the normal token breaking and reflow logic within the line.
+ unsigned ToSplitColumns = Token->getRangeLength(
+ NextLineIndex, TailOffset, Split.first, ContentStartColumn);
+ if (ContentStartColumn + ToSplitColumns > ColumnLimit) {
+ DEBUG(llvm::dbgs() << " Next split protrudes, need: "
+ << (ContentStartColumn + ToSplitColumns)
+ << ", space: " << ColumnLimit);
+ unsigned ExcessCharactersPenalty =
+ (ContentStartColumn + ToSplitColumns - ColumnLimit) *
+ Style.PenaltyExcessCharacter;
+ if (NewBreakPenalty < ExcessCharactersPenalty) {
+ Reflow = false;
+ }
+ }
+ }
+ }
+ } else {
+ DEBUG(llvm::dbgs() << "not found.\n");
+ }
+ }
+ if (!Reflow) {
+ // If we didn't reflow into the next line, the only space to consider is
+ // the next logical line. Reset our state to match the start of the next
+ // line.
+ TailOffset = 0;
+ ContentStartColumn =
+ Token->getContentStartColumn(NextLineIndex, /*Break=*/false);
+ RemainingTokenColumns = Token->getRemainingLength(
+ NextLineIndex, TailOffset, ContentStartColumn);
+ // Adapt the start of the token, for example indent.
+ if (!DryRun)
+ Token->adaptStartOfLine(NextLineIndex, Whitespaces);
+ } else {
+ // If we found a reflow split and have added a new break before the next
+ // line, we are going to remove the line break at the start of the next
+ // logical line. For example, here we'll add a new line break after
+ // 'text', and subsequently delete the line break between 'that' and
+ // 'reflows'.
+ // // some text that
+ // // reflows
+ // ->
+ // // some text
+ // // that reflows
+ // When adding the line break, we also added the penalty for it, so we
+ // need to subtract that penalty again when we remove the line break due
+ // to reflowing.
+ if (NewBreakBefore) {
+ assert(Penalty >= NewBreakPenalty);
+ Penalty -= NewBreakPenalty;
+ }
+ if (!DryRun)
+ Token->reflow(NextLineIndex, Whitespaces);
+ }
+ }
}
BreakableToken::Split SplitAfterLastLine =
- Token->getSplitAfterLastLine(TailOffset, ColumnLimit);
+ Token->getSplitAfterLastLine(TailOffset);
if (SplitAfterLastLine.first != StringRef::npos) {
DEBUG(llvm::dbgs() << "Replacing whitespace after last line.\n");
if (!DryRun)
Token->replaceWhitespaceAfterLastLine(TailOffset, SplitAfterLastLine,
Whitespaces);
- RemainingTokenColumns = Token->getLineLengthAfterSplitAfterLastLine(
- TailOffset, SplitAfterLastLine);
+ ContentStartColumn =
+ Token->getContentStartColumn(Token->getLineCount() - 1, /*Break=*/true);
+ RemainingTokenColumns = Token->getRemainingLength(
+ Token->getLineCount() - 1,
+ TailOffset + SplitAfterLastLine.first + SplitAfterLastLine.second,
+ ContentStartColumn);
}
- State.Column = RemainingTokenColumns;
+ State.Column = ContentStartColumn + RemainingTokenColumns -
+ Current.UnbreakableTailLength;
if (BreakInserted) {
// If we break the token inside a parameter list, we need to break before