Makes whitespace management more consistent. Instead of selectively storing some changes and directly generating replacements for others, we now notify the WhitespaceManager of the whitespace before every token (and optionally with more changes inside tokens). Then, we run over all whitespace in the very end in original source order, where we have all information available to correctly align comments and escaped newlines. The future direction is to pull more of the comment alignment implementation that is now in the BreakableToken into the WhitespaceManager. This fixes a bug when aligning comments or escaped newlines in unwrapped lines that are handled out of order: #define A \ f({ \ g(); \ }); ... now gets correctly layouted. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@182467 91177308-0d34-0410-b5e6-96231b3b80d8

commit: e573c3f7fc40e813559ab4ff1e7eec4f66f1a50f [log] [tgz]
author: Manuel Klimek <klimek@google.com> Wed May 22 12:51:29 2013 +0000
committer: Manuel Klimek <klimek@google.com> Wed May 22 12:51:29 2013 +0000
tree: a5fcc07cb4cd9aeebc47bf84d36f977423e32479
parent: 6e6efa7ec880559b7dd3096423b2345c0ff21c4c [diff] [blame]
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp
index 1f36f9a..6e81e44 100644
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp

@@ -242,10 +242,7 @@
         Whitespaces(Whitespaces), Count(0) {}
 
   /// \brief Formats an \c UnwrappedLine.
-  ///
-  /// \returns The column after the last token in the last line of the
-  /// \c UnwrappedLine.
-  unsigned format(const AnnotatedLine *NextLine) {
+  void format(const AnnotatedLine *NextLine) {
     // Initialize state dependent on indent.
     LineState State;
     State.Column = FirstIndent;
@@ -271,7 +268,6 @@
       while (State.NextToken != NULL) {
         addTokenToState(false, false, State);
       }
-      return State.Column;
     }
 
     // If the ObjC method declaration does not fit on a line, we should format
@@ -280,7 +276,7 @@
       State.Stack.back().BreakBeforeParameter = true;
 
     // Find best solution in solution space.
-    return analyzeSolutionSpace(State);
+    analyzeSolutionSpace(State);
   }
 
 private:
@@ -483,7 +479,6 @@
     unsigned ContinuationIndent =
         std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) + 4;
     if (Newline) {
-      unsigned WhitespaceStartColumn = State.Column;
       if (Current.is(tok::r_brace)) {
         State.Column = Line.Level * Style.IndentWidth;
       } else if (Current.is(tok::string_literal) &&
@@ -545,12 +540,8 @@
           NewLines =
               std::max(NewLines, std::min(Current.FormatTok.NewlinesBefore,
                                           Style.MaxEmptyLinesToKeep + 1));
-        if (!Line.InPPDirective)
-          Whitespaces.replaceWhitespace(Current, NewLines, State.Column,
-                                        WhitespaceStartColumn);
-        else
-          Whitespaces.replacePPWhitespace(Current, NewLines, State.Column,
-                                          WhitespaceStartColumn);
+        Whitespaces.replaceWhitespace(Current, NewLines, State.Column,
+                                      State.Column, Line.InPPDirective);
       }
 
       State.Stack.back().LastSpace = State.Column;
@@ -603,7 +594,8 @@
       unsigned Spaces = State.NextToken->SpacesRequiredBefore;
 
       if (!DryRun)
-        Whitespaces.replaceWhitespace(Current, 0, Spaces, State.Column);
+        Whitespaces.replaceWhitespace(Current, 0, Spaces,
+                                      State.Column + Spaces);
 
       if (Current.Type == TT_ObjCSelectorName &&
           State.Stack.back().ColonPos == 0) {
@@ -786,11 +778,10 @@
   /// already handled in \c addNextStateToQueue; the returned penalty will only
   /// cover the cost of the additional line breaks.
   unsigned breakProtrudingToken(const AnnotatedToken &Current, LineState &State,
-                                bool DryRun,
-                                unsigned UnbreakableTailLength = 0) {
+                                bool DryRun) {
+    unsigned UnbreakableTailLength = Current.UnbreakableTailLength;
     llvm::OwningPtr<BreakableToken> Token;
-    unsigned StartColumn = State.Column - Current.FormatTok.TokenLength -
-                           UnbreakableTailLength;
+    unsigned StartColumn = State.Column - Current.FormatTok.TokenLength;
     if (Current.is(tok::string_literal) &&
         Current.Type != TT_ImplicitStringLiteral) {
       // Only break up default narrow strings.
@@ -812,15 +803,7 @@
                 Current.Parent->Type != TT_ImplicitStringLiteral)) {
       Token.reset(new BreakableLineComment(SourceMgr, Current, StartColumn));
     } else {
-      // If a token that we cannot breaks protrudes, it means we were unable to
-      // break a sequence of tokens due to disallowed breaks between the tokens.
-      // Thus, we recursively search backwards to try to find a breakable token.
-      if (State.Column <= getColumnLimit() ||
-          Current.CanBreakBefore || !Current.Parent)
-        return 0;
-      return breakProtrudingToken(
-          *Current.Parent, State, DryRun,
-          UnbreakableTailLength + Current.FormatTok.TokenLength);
+      return 0;
     }
     if (UnbreakableTailLength >= getColumnLimit())
       return 0;
@@ -836,7 +819,7 @@
           Token->getLineLengthAfterSplit(LineIndex, TailOffset);
       while (RemainingTokenLength > RemainingSpace) {
         BreakableToken::Split Split =
-            Token->getSplit(LineIndex, TailOffset, RemainingSpace);
+            Token->getSplit(LineIndex, TailOffset, getColumnLimit());
         if (Split.first == StringRef::npos)
           break;
         assert(Split.first != 0);
@@ -860,7 +843,7 @@
     }
 
     if (BreakInserted) {
-      State.Column = PositionAfterLastLineInToken + UnbreakableTailLength;
+      State.Column = PositionAfterLastLineInToken;
       for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
         State.Stack[i].BreakBeforeParameter = true;
       State.Stack.back().LastSpace = StartColumn;
@@ -904,7 +887,7 @@
   /// the solution space (\c LineStates are the nodes). The algorithm tries to
   /// find the shortest path (the one with lowest penalty) from \p InitialState
   /// to a state where all tokens are placed.
-  unsigned analyzeSolutionSpace(LineState &InitialState) {
+  void analyzeSolutionSpace(LineState &InitialState) {
     std::set<LineState> Seen;
 
     // Insert start element into queue.
@@ -939,15 +922,12 @@
     if (Queue.empty())
       // We were unable to find a solution, do nothing.
       // FIXME: Add diagnostic?
-      return 0;
+      return;
 
     // Reconstruct the solution.
     reconstructPath(InitialState, Queue.top().second);
     DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
     DEBUG(llvm::dbgs() << "---\n");
-
-    // Return the column after the last token of the solution.
-    return Queue.top().second->State.Column;
   }
 
   void reconstructPath(LineState &State, StateNode *Current) {
@@ -1191,7 +1171,6 @@
     LexerBasedFormatTokenSource Tokens(Lex, SourceMgr);
     UnwrappedLineParser Parser(Style, Tokens, *this);
     bool StructuralError = Parser.parse();
-    unsigned PreviousEndOfLineColumn = 0;
     TokenAnnotator Annotator(Style, SourceMgr, Lex,
                              Tokens.getIdentTable().get("in"));
     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
@@ -1247,7 +1226,7 @@
         if (PreviousLineWasTouched) {
           unsigned NewLines = std::min(FirstTok.NewlinesBefore, 1u);
           Whitespaces.replaceWhitespace(TheLine.First, NewLines, /*Indent*/ 0,
-                                        /*WhitespaceStartColumn*/ 0);
+                                        /*TargetColumn*/ 0);
         }
       } else if (TheLine.Type != LT_Invalid &&
                  (WasMoved || FormatPPDirective || touchesLine(TheLine))) {
@@ -1257,46 +1236,49 @@
             // we break apart a line consisting of multiple unwrapped lines.
             (FirstTok.NewlinesBefore == 0 || !StructuralError)) {
           formatFirstToken(TheLine.First, PreviousLineLastToken, Indent,
-                           TheLine.InPPDirective, PreviousEndOfLineColumn);
+                           TheLine.InPPDirective);
         } else {
           Indent = LevelIndent =
               SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1;
         }
         UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent,
                                          TheLine.First, Whitespaces);
-        PreviousEndOfLineColumn =
-            Formatter.format(I + 1 != E ? &*(I + 1) : NULL);
+        Formatter.format(I + 1 != E ? &*(I + 1) : NULL);
         IndentForLevel[TheLine.Level] = LevelIndent;
         PreviousLineWasTouched = true;
       } else {
-        if (FirstTok.NewlinesBefore > 0 || FirstTok.IsFirst) {
-          unsigned LevelIndent =
-              SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1;
-          // Remove trailing whitespace of the previous line if it was touched.
-          if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine))
-            formatFirstToken(TheLine.First, PreviousLineLastToken, LevelIndent,
-                             TheLine.InPPDirective, PreviousEndOfLineColumn);
+        // Format the first token if necessary, and notify the WhitespaceManager
+        // about the unchanged whitespace.
+        for (const AnnotatedToken *Tok = &TheLine.First; Tok != NULL;
+             Tok = Tok->Children.empty() ? NULL : &Tok->Children[0]) {
+          if (Tok == &TheLine.First &&
+              (Tok->FormatTok.NewlinesBefore > 0 || Tok->FormatTok.IsFirst)) {
+            unsigned LevelIndent = SourceMgr.getSpellingColumnNumber(
+                Tok->FormatTok.Tok.getLocation()) -
+                                   1;
+            // Remove trailing whitespace of the previous line if it was
+            // touched.
+            if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) {
+              formatFirstToken(*Tok, PreviousLineLastToken, LevelIndent,
+                               TheLine.InPPDirective);
+            } else {
+              Whitespaces.addUntouchableToken(Tok->FormatTok,
+                                              TheLine.InPPDirective);
+            }
 
-          if (static_cast<int>(LevelIndent) - Offset >= 0)
-            LevelIndent -= Offset;
-          if (TheLine.First.isNot(tok::comment))
-            IndentForLevel[TheLine.Level] = LevelIndent;
+            if (static_cast<int>(LevelIndent) - Offset >= 0)
+              LevelIndent -= Offset;
+            if (Tok->isNot(tok::comment))
+              IndentForLevel[TheLine.Level] = LevelIndent;
+          } else {
+            Whitespaces.addUntouchableToken(Tok->FormatTok,
+                                            TheLine.InPPDirective);
+          }
         }
         // If we did not reformat this unwrapped line, the column at the end of
         // the last token is unchanged - thus, we can calculate the end of the
         // last token.
-        SourceLocation LastLoc = TheLine.Last->FormatTok.Tok.getLocation();
-        PreviousEndOfLineColumn =
-            SourceMgr.getSpellingColumnNumber(LastLoc) +
-            Lex.MeasureTokenLength(LastLoc, SourceMgr, Lex.getLangOpts()) - 1;
         PreviousLineWasTouched = false;
-        if (TheLine.Last->is(tok::comment))
-          Whitespaces.addUntouchableComment(
-              SourceMgr.getSpellingColumnNumber(
-                  TheLine.Last->FormatTok.Tok.getLocation()) -
-              1);
-        else
-          Whitespaces.alignComments();
       }
       PreviousLineLastToken = I->Last;
     }
@@ -1556,7 +1538,7 @@
   /// Returns the indent level of the \c UnwrappedLine.
   void formatFirstToken(const AnnotatedToken &RootToken,
                         const AnnotatedToken *PreviousToken, unsigned Indent,
-                        bool InPPDirective, unsigned PreviousEndOfLineColumn) {
+                        bool InPPDirective) {
     const FormatToken &Tok = RootToken.FormatTok;
 
     unsigned Newlines =
@@ -1564,17 +1546,13 @@
     if (Newlines == 0 && !Tok.IsFirst)
       Newlines = 1;
 
-    if (!InPPDirective || Tok.HasUnescapedNewline) {
-      // Insert extra new line before access specifiers.
-      if (PreviousToken && PreviousToken->isOneOf(tok::semi, tok::r_brace) &&
-          RootToken.isAccessSpecifier() && Tok.NewlinesBefore == 1)
-        ++Newlines;
+    // Insert extra new line before access specifiers.
+    if (PreviousToken && PreviousToken->isOneOf(tok::semi, tok::r_brace) &&
+        RootToken.isAccessSpecifier() && Tok.NewlinesBefore == 1)
+      ++Newlines;
 
-      Whitespaces.replaceWhitespace(RootToken, Newlines, Indent, 0);
-    } else {
-      Whitespaces.replacePPWhitespace(RootToken, Newlines, Indent,
-                                      PreviousEndOfLineColumn);
-    }
+    Whitespaces.replaceWhitespace(RootToken, Newlines, Indent, Indent,
+                                  InPPDirective && !Tok.HasUnescapedNewline);
   }
 
   FormatStyle Style;
commit	e573c3f7fc40e813559ab4ff1e7eec4f66f1a50f	[log] [tgz]
author	Manuel Klimek <klimek@google.com>	Wed May 22 12:51:29 2013 +0000
committer	Manuel Klimek <klimek@google.com>	Wed May 22 12:51:29 2013 +0000
tree	a5fcc07cb4cd9aeebc47bf84d36f977423e32479
parent	6e6efa7ec880559b7dd3096423b2345c0ff21c4c [diff] [blame]