Makes whitespace management more consistent.

Instead of selectively storing some changes and directly generating
replacements for others, we now notify the WhitespaceManager of the
whitespace before every token (and optionally with more changes inside
tokens).

Then, we run over all whitespace in the very end in original source
order, where we have all information available to correctly align
comments and escaped newlines.

The future direction is to pull more of the comment alignment
implementation that is now in the BreakableToken into the
WhitespaceManager.

This fixes a bug when aligning comments or escaped newlines in unwrapped
lines that are handled out of order:
  #define A \
    f({     \
      g();  \
    });
... now gets correctly layouted.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@182467 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp
index 1f36f9a..6e81e44 100644
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -242,10 +242,7 @@
         Whitespaces(Whitespaces), Count(0) {}
 
   /// \brief Formats an \c UnwrappedLine.
-  ///
-  /// \returns The column after the last token in the last line of the
-  /// \c UnwrappedLine.
-  unsigned format(const AnnotatedLine *NextLine) {
+  void format(const AnnotatedLine *NextLine) {
     // Initialize state dependent on indent.
     LineState State;
     State.Column = FirstIndent;
@@ -271,7 +268,6 @@
       while (State.NextToken != NULL) {
         addTokenToState(false, false, State);
       }
-      return State.Column;
     }
 
     // If the ObjC method declaration does not fit on a line, we should format
@@ -280,7 +276,7 @@
       State.Stack.back().BreakBeforeParameter = true;
 
     // Find best solution in solution space.
-    return analyzeSolutionSpace(State);
+    analyzeSolutionSpace(State);
   }
 
 private:
@@ -483,7 +479,6 @@
     unsigned ContinuationIndent =
         std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) + 4;
     if (Newline) {
-      unsigned WhitespaceStartColumn = State.Column;
       if (Current.is(tok::r_brace)) {
         State.Column = Line.Level * Style.IndentWidth;
       } else if (Current.is(tok::string_literal) &&
@@ -545,12 +540,8 @@
           NewLines =
               std::max(NewLines, std::min(Current.FormatTok.NewlinesBefore,
                                           Style.MaxEmptyLinesToKeep + 1));
-        if (!Line.InPPDirective)
-          Whitespaces.replaceWhitespace(Current, NewLines, State.Column,
-                                        WhitespaceStartColumn);
-        else
-          Whitespaces.replacePPWhitespace(Current, NewLines, State.Column,
-                                          WhitespaceStartColumn);
+        Whitespaces.replaceWhitespace(Current, NewLines, State.Column,
+                                      State.Column, Line.InPPDirective);
       }
 
       State.Stack.back().LastSpace = State.Column;
@@ -603,7 +594,8 @@
       unsigned Spaces = State.NextToken->SpacesRequiredBefore;
 
       if (!DryRun)
-        Whitespaces.replaceWhitespace(Current, 0, Spaces, State.Column);
+        Whitespaces.replaceWhitespace(Current, 0, Spaces,
+                                      State.Column + Spaces);
 
       if (Current.Type == TT_ObjCSelectorName &&
           State.Stack.back().ColonPos == 0) {
@@ -786,11 +778,10 @@
   /// already handled in \c addNextStateToQueue; the returned penalty will only
   /// cover the cost of the additional line breaks.
   unsigned breakProtrudingToken(const AnnotatedToken &Current, LineState &State,
-                                bool DryRun,
-                                unsigned UnbreakableTailLength = 0) {
+                                bool DryRun) {
+    unsigned UnbreakableTailLength = Current.UnbreakableTailLength;
     llvm::OwningPtr<BreakableToken> Token;
-    unsigned StartColumn = State.Column - Current.FormatTok.TokenLength -
-                           UnbreakableTailLength;
+    unsigned StartColumn = State.Column - Current.FormatTok.TokenLength;
     if (Current.is(tok::string_literal) &&
         Current.Type != TT_ImplicitStringLiteral) {
       // Only break up default narrow strings.
@@ -812,15 +803,7 @@
                 Current.Parent->Type != TT_ImplicitStringLiteral)) {
       Token.reset(new BreakableLineComment(SourceMgr, Current, StartColumn));
     } else {
-      // If a token that we cannot breaks protrudes, it means we were unable to
-      // break a sequence of tokens due to disallowed breaks between the tokens.
-      // Thus, we recursively search backwards to try to find a breakable token.
-      if (State.Column <= getColumnLimit() ||
-          Current.CanBreakBefore || !Current.Parent)
-        return 0;
-      return breakProtrudingToken(
-          *Current.Parent, State, DryRun,
-          UnbreakableTailLength + Current.FormatTok.TokenLength);
+      return 0;
     }
     if (UnbreakableTailLength >= getColumnLimit())
       return 0;
@@ -836,7 +819,7 @@
           Token->getLineLengthAfterSplit(LineIndex, TailOffset);
       while (RemainingTokenLength > RemainingSpace) {
         BreakableToken::Split Split =
-            Token->getSplit(LineIndex, TailOffset, RemainingSpace);
+            Token->getSplit(LineIndex, TailOffset, getColumnLimit());
         if (Split.first == StringRef::npos)
           break;
         assert(Split.first != 0);
@@ -860,7 +843,7 @@
     }
 
     if (BreakInserted) {
-      State.Column = PositionAfterLastLineInToken + UnbreakableTailLength;
+      State.Column = PositionAfterLastLineInToken;
       for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
         State.Stack[i].BreakBeforeParameter = true;
       State.Stack.back().LastSpace = StartColumn;
@@ -904,7 +887,7 @@
   /// the solution space (\c LineStates are the nodes). The algorithm tries to
   /// find the shortest path (the one with lowest penalty) from \p InitialState
   /// to a state where all tokens are placed.
-  unsigned analyzeSolutionSpace(LineState &InitialState) {
+  void analyzeSolutionSpace(LineState &InitialState) {
     std::set<LineState> Seen;
 
     // Insert start element into queue.
@@ -939,15 +922,12 @@
     if (Queue.empty())
       // We were unable to find a solution, do nothing.
       // FIXME: Add diagnostic?
-      return 0;
+      return;
 
     // Reconstruct the solution.
     reconstructPath(InitialState, Queue.top().second);
     DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
     DEBUG(llvm::dbgs() << "---\n");
-
-    // Return the column after the last token of the solution.
-    return Queue.top().second->State.Column;
   }
 
   void reconstructPath(LineState &State, StateNode *Current) {
@@ -1191,7 +1171,6 @@
     LexerBasedFormatTokenSource Tokens(Lex, SourceMgr);
     UnwrappedLineParser Parser(Style, Tokens, *this);
     bool StructuralError = Parser.parse();
-    unsigned PreviousEndOfLineColumn = 0;
     TokenAnnotator Annotator(Style, SourceMgr, Lex,
                              Tokens.getIdentTable().get("in"));
     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
@@ -1247,7 +1226,7 @@
         if (PreviousLineWasTouched) {
           unsigned NewLines = std::min(FirstTok.NewlinesBefore, 1u);
           Whitespaces.replaceWhitespace(TheLine.First, NewLines, /*Indent*/ 0,
-                                        /*WhitespaceStartColumn*/ 0);
+                                        /*TargetColumn*/ 0);
         }
       } else if (TheLine.Type != LT_Invalid &&
                  (WasMoved || FormatPPDirective || touchesLine(TheLine))) {
@@ -1257,46 +1236,49 @@
             // we break apart a line consisting of multiple unwrapped lines.
             (FirstTok.NewlinesBefore == 0 || !StructuralError)) {
           formatFirstToken(TheLine.First, PreviousLineLastToken, Indent,
-                           TheLine.InPPDirective, PreviousEndOfLineColumn);
+                           TheLine.InPPDirective);
         } else {
           Indent = LevelIndent =
               SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1;
         }
         UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent,
                                          TheLine.First, Whitespaces);
-        PreviousEndOfLineColumn =
-            Formatter.format(I + 1 != E ? &*(I + 1) : NULL);
+        Formatter.format(I + 1 != E ? &*(I + 1) : NULL);
         IndentForLevel[TheLine.Level] = LevelIndent;
         PreviousLineWasTouched = true;
       } else {
-        if (FirstTok.NewlinesBefore > 0 || FirstTok.IsFirst) {
-          unsigned LevelIndent =
-              SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1;
-          // Remove trailing whitespace of the previous line if it was touched.
-          if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine))
-            formatFirstToken(TheLine.First, PreviousLineLastToken, LevelIndent,
-                             TheLine.InPPDirective, PreviousEndOfLineColumn);
+        // Format the first token if necessary, and notify the WhitespaceManager
+        // about the unchanged whitespace.
+        for (const AnnotatedToken *Tok = &TheLine.First; Tok != NULL;
+             Tok = Tok->Children.empty() ? NULL : &Tok->Children[0]) {
+          if (Tok == &TheLine.First &&
+              (Tok->FormatTok.NewlinesBefore > 0 || Tok->FormatTok.IsFirst)) {
+            unsigned LevelIndent = SourceMgr.getSpellingColumnNumber(
+                Tok->FormatTok.Tok.getLocation()) -
+                                   1;
+            // Remove trailing whitespace of the previous line if it was
+            // touched.
+            if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) {
+              formatFirstToken(*Tok, PreviousLineLastToken, LevelIndent,
+                               TheLine.InPPDirective);
+            } else {
+              Whitespaces.addUntouchableToken(Tok->FormatTok,
+                                              TheLine.InPPDirective);
+            }
 
-          if (static_cast<int>(LevelIndent) - Offset >= 0)
-            LevelIndent -= Offset;
-          if (TheLine.First.isNot(tok::comment))
-            IndentForLevel[TheLine.Level] = LevelIndent;
+            if (static_cast<int>(LevelIndent) - Offset >= 0)
+              LevelIndent -= Offset;
+            if (Tok->isNot(tok::comment))
+              IndentForLevel[TheLine.Level] = LevelIndent;
+          } else {
+            Whitespaces.addUntouchableToken(Tok->FormatTok,
+                                            TheLine.InPPDirective);
+          }
         }
         // If we did not reformat this unwrapped line, the column at the end of
         // the last token is unchanged - thus, we can calculate the end of the
         // last token.
-        SourceLocation LastLoc = TheLine.Last->FormatTok.Tok.getLocation();
-        PreviousEndOfLineColumn =
-            SourceMgr.getSpellingColumnNumber(LastLoc) +
-            Lex.MeasureTokenLength(LastLoc, SourceMgr, Lex.getLangOpts()) - 1;
         PreviousLineWasTouched = false;
-        if (TheLine.Last->is(tok::comment))
-          Whitespaces.addUntouchableComment(
-              SourceMgr.getSpellingColumnNumber(
-                  TheLine.Last->FormatTok.Tok.getLocation()) -
-              1);
-        else
-          Whitespaces.alignComments();
       }
       PreviousLineLastToken = I->Last;
     }
@@ -1556,7 +1538,7 @@
   /// Returns the indent level of the \c UnwrappedLine.
   void formatFirstToken(const AnnotatedToken &RootToken,
                         const AnnotatedToken *PreviousToken, unsigned Indent,
-                        bool InPPDirective, unsigned PreviousEndOfLineColumn) {
+                        bool InPPDirective) {
     const FormatToken &Tok = RootToken.FormatTok;
 
     unsigned Newlines =
@@ -1564,17 +1546,13 @@
     if (Newlines == 0 && !Tok.IsFirst)
       Newlines = 1;
 
-    if (!InPPDirective || Tok.HasUnescapedNewline) {
-      // Insert extra new line before access specifiers.
-      if (PreviousToken && PreviousToken->isOneOf(tok::semi, tok::r_brace) &&
-          RootToken.isAccessSpecifier() && Tok.NewlinesBefore == 1)
-        ++Newlines;
+    // Insert extra new line before access specifiers.
+    if (PreviousToken && PreviousToken->isOneOf(tok::semi, tok::r_brace) &&
+        RootToken.isAccessSpecifier() && Tok.NewlinesBefore == 1)
+      ++Newlines;
 
-      Whitespaces.replaceWhitespace(RootToken, Newlines, Indent, 0);
-    } else {
-      Whitespaces.replacePPWhitespace(RootToken, Newlines, Indent,
-                                      PreviousEndOfLineColumn);
-    }
+    Whitespaces.replaceWhitespace(RootToken, Newlines, Indent, Indent,
+                                  InPPDirective && !Tok.HasUnescapedNewline);
   }
 
   FormatStyle Style;