Restructure how we break tokens.

This fixes some bugs in the reflowing logic and splits out the concerns
of reflowing from BreakableToken.

Things to do after this patch:
- Refactor the breakProtrudingToken function possibly into a class, so we
  can split it up into methods that operate on the common state.
- Optimize whitespace compression when reflowing by using the next possible
  split point instead of the latest possible split point.
- Retry different strategies for reflowing (strictly staying below the
  column limit vs. allowing excess characters if possible).

Differential Revision: https://reviews.llvm.org/D40310

llvm-svn: 319314
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index acff8d3..7be9817 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -7732,6 +7732,12 @@
             format("#define A \"some text other\";", AlignLeft));
 }
 
+TEST_F(FormatTest, BreaksStringLiteralsAtColumnLimit) {
+  EXPECT_EQ("C a = \"some more \"\n"
+            "      \"text\";",
+            format("C a = \"some more text\";", getLLVMStyleWithColumns(18)));
+}
+
 TEST_F(FormatTest, FullyRemoveEmptyLines) {
   FormatStyle NoEmptyLines = getLLVMStyleWithColumns(80);
   NoEmptyLines.MaxEmptyLinesToKeep = 0;
@@ -9927,16 +9933,9 @@
 
   Style.PenaltyExcessCharacter = 90;
   verifyFormat("int a; // the comment", Style);
-  EXPECT_EQ("int a; // the\n"
-            "       // comment aa",
+  EXPECT_EQ("int a; // the comment\n"
+            "       // aa",
             format("int a; // the comment aa", Style));
-  EXPECT_EQ("int a; // first line\n"
-            "       // second line\n"
-            "       // third line",
-            format("int a; // first line\n"
-                   "       // second line\n"
-                   "       // third line",
-                   Style));
   EXPECT_EQ("int a; /* first line\n"
             "        * second line\n"
             "        * third line\n"
@@ -9946,12 +9945,18 @@
                    "        * third line\n"
                    "        */",
                    Style));
+  EXPECT_EQ("int a; // first line\n"
+            "       // second line\n"
+            "       // third line",
+            format("int a; // first line\n"
+                   "       // second line\n"
+                   "       // third line",
+                   Style));
   // FIXME: Investigate why this is not getting the same layout as the test
   // above.
   EXPECT_EQ("int a; /* first line\n"
-            "        * second\n"
-            "        * line third\n"
-            "        * line\n"
+            "        * second line\n"
+            "        * third line\n"
             "        */",
             format("int a; /* first line second line third line"
                    "\n*/",
@@ -9970,31 +9975,23 @@
 
   // FIXME: Optimally, we'd keep bazfoo on the first line and reflow bar to the
   // next one.
-  EXPECT_EQ("// foo bar baz\n"
-            "// bazfoo bar foo\n"
-            "// bar\n",
+  EXPECT_EQ("// foo bar baz bazfoo\n"
+            "// bar foo bar\n",
             format("// foo bar baz      bazfoo bar\n"
                    "// foo            bar\n",
                    Style));
 
   EXPECT_EQ("// foo bar baz bazfoo\n"
-            "// foo bar baz\n"
-            "// bazfoo bar foo\n"
-            "// bar\n",
+            "// foo bar baz bazfoo\n"
+            "// bar foo bar\n",
             format("// foo bar baz      bazfoo\n"
                    "// foo bar baz      bazfoo bar\n"
                    "// foo bar\n",
                    Style));
 
-  // FIXME: Optimally, we'd keep 'bar' in the last line at the end of the line,
-  // as it does not actually protrude far enough to make breaking pay off.
-  // Unfortunately, due to how reflowing is currently implemented, we already
-  // check the column limit after the reflowing decision and extend the reflow
-  // range, so we will not take whitespace compression into account.
   EXPECT_EQ("// foo bar baz bazfoo\n"
-            "// foo bar baz\n"
-            "// bazfoo bar foo\n"
-            "// bar\n",
+            "// foo bar baz bazfoo\n"
+            "// bar foo bar\n",
             format("// foo bar baz      bazfoo\n"
                    "// foo bar baz      bazfoo bar\n"
                    "// foo           bar\n",
@@ -10595,10 +10592,12 @@
       "\"七 八 九 \"\n"
       "\"十\"",
       format("\"一 二 三 四 五六 七 八 九 十\"", getLLVMStyleWithColumns(11)));
-  EXPECT_EQ("\"一\t二 \"\n"
-            "\"\t三 \"\n"
-            "\"四 五\t六 \"\n"
-            "\"\t七 \"\n"
+  EXPECT_EQ("\"一\t\"\n"
+            "\"二 \t\"\n"
+            "\"三 四 \"\n"
+            "\"五\t\"\n"
+            "\"六 \t\"\n"
+            "\"七 \"\n"
             "\"八九十\tqq\"",
             format("\"一\t二 \t三 四 五\t六 \t七 八九十\tqq\"",
                    getLLVMStyleWithColumns(11)));
diff --git a/clang/unittests/Format/FormatTestComments.cpp b/clang/unittests/Format/FormatTestComments.cpp
index cde3334..220ce08 100644
--- a/clang/unittests/Format/FormatTestComments.cpp
+++ b/clang/unittests/Format/FormatTestComments.cpp
@@ -1096,11 +1096,12 @@
 }
 
 TEST_F(FormatTestComments, SplitsLongLinesInComments) {
+  // FIXME: Do we need to fix up the "  */" at the end?
+  // It doesn't look like any of our current logic triggers this.
   EXPECT_EQ("/* This is a long\n"
             " * comment that\n"
-            " * doesn't\n"
-            " * fit on one line.\n"
-            " */",
+            " * doesn't fit on\n"
+            " * one line.  */",
             format("/* "
                    "This is a long                                         "
                    "comment that "
@@ -2102,6 +2103,85 @@
   EXPECT_EQ("///", format(" ///  ", getLLVMStyleWithColumns(20)));
 }
 
+TEST_F(FormatTestComments, ReflowsCommentsPrecise) {
+  // FIXME: This assumes we do not continue compressing whitespace once we are
+  // in reflow mode. Consider compressing whitespace.
+
+  // Test that we stop reflowing precisely at the column limit.
+  // After reflowing, "// reflows into   foo" does not fit the column limit,
+  // so we compress the whitespace.
+  EXPECT_EQ("// some text that\n"
+            "// reflows into foo\n",
+            format("// some text that reflows\n"
+                   "// into   foo\n",
+                   getLLVMStyleWithColumns(20)));
+  // Given one more column, "// reflows into   foo" does fit the limit, so we
+  // do not compress the whitespace.
+  EXPECT_EQ("// some text that\n"
+            "// reflows into   foo\n",
+            format("// some text that reflows\n"
+                   "// into   foo\n",
+                   getLLVMStyleWithColumns(21)));
+
+  // Make sure that we correctly account for the space added in the reflow case
+  // when making the reflowing decision.
+  // First, when the next line ends precisely one column over the limit, do not
+  // reflow.
+  EXPECT_EQ("// some text that\n"
+            "// reflows\n"
+            "// into1234567\n",
+            format("// some text that reflows\n"
+                   "// into1234567\n",
+                   getLLVMStyleWithColumns(21)));
+  // Secondly, when the next line ends later, but the first word in that line
+  // is precisely one column over the limit, do not reflow.
+  EXPECT_EQ("// some text that\n"
+            "// reflows\n"
+            "// into1234567 f\n",
+            format("// some text that reflows\n"
+                   "// into1234567 f\n",
+                   getLLVMStyleWithColumns(21)));
+}
+
+TEST_F(FormatTestComments, ReflowsCommentsWithExtraWhitespace) {
+  // Baseline.
+  EXPECT_EQ("// some text\n"
+            "// that re flows\n",
+            format("// some text that\n"
+                   "// re flows\n",
+                   getLLVMStyleWithColumns(16)));
+  EXPECT_EQ("// some text\n"
+            "// that re flows\n",
+            format("// some text that\n"
+                   "// re    flows\n",
+                   getLLVMStyleWithColumns(16)));
+  EXPECT_EQ("/* some text\n"
+            " * that re flows\n"
+            " */\n",
+            format("/* some text that\n"
+                   "*      re       flows\n"
+                   "*/\n",
+                   getLLVMStyleWithColumns(16)));
+  // FIXME: We do not reflow if the indent of two subsequent lines differs;
+  // given that this is different behavior from block comments, do we want
+  // to keep this?
+  EXPECT_EQ("// some text\n"
+            "// that\n"
+            "//     re flows\n",
+            format("// some text that\n"
+                   "//     re       flows\n",
+                   getLLVMStyleWithColumns(16)));
+  // Space within parts of a line that fit.
+  // FIXME: Use the earliest possible split while reflowing to compress the
+  // whitespace within the line.
+  EXPECT_EQ("// some text that\n"
+            "// does re   flow\n"
+            "// more  here\n",
+            format("// some text that does\n"
+                   "// re   flow  more  here\n",
+                   getLLVMStyleWithColumns(21)));
+}
+
 TEST_F(FormatTestComments, IgnoresIf0Contents) {
   EXPECT_EQ("#if 0\n"
             "}{)(&*(^%%#%@! fsadj f;ldjs ,:;| <<<>>>][)(][\n"
@@ -2484,6 +2564,7 @@
       "         long */\n"
       "      b);",
       format("a = f(a, /* long long */ b);", getLLVMStyleWithColumns(16)));
+
   EXPECT_EQ(
       "a = f(\n"
       "    a,\n"
@@ -2888,7 +2969,7 @@
                    getLLVMStyleWithColumns(20)));
 }
 
-TEST_F(FormatTestComments, NoCrush_Bug34236) {
+TEST_F(FormatTestComments, NoCrash_Bug34236) {
   // This is a test case from a crasher reported in:
   // https://bugs.llvm.org/show_bug.cgi?id=34236
   // Temporarily disable formatting for readability.
@@ -2896,8 +2977,7 @@
   EXPECT_EQ(
 "/*                                                                */ /*\n"
 "                                                                      *       a\n"
-"                                                                      * b c\n"
-"                                                                      * d*/",
+"                                                                      * b c d*/",
       format(
 "/*                                                                */ /*\n"
 " *       a b\n"