Implement more accurate penalty & trade-offs while breaking protruding tokens.

For each line that we break in a protruding token, compute whether the
penalty of breaking is actually larger than the penalty of the excess
characters. Only break if that is the case.

llvm-svn: 318515
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index d9b4094..7fbf01d 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -8002,9 +8002,9 @@
             "    \"f\");",
             format("someFunction1234567890(\"aaabbbcccdddeeefff\");",
                    getLLVMStyleWithColumns(24)));
-  EXPECT_EQ("someFunction(\"aaabbbcc \"\n"
-            "             \"ddde \"\n"
-            "             \"efff\");",
+  EXPECT_EQ("someFunction(\n"
+            "    \"aaabbbcc ddde \"\n"
+            "    \"efff\");",
             format("someFunction(\"aaabbbcc ddde efff\");",
                    getLLVMStyleWithColumns(25)));
   EXPECT_EQ("someFunction(\"aaabbbccc \"\n"
@@ -8023,10 +8023,9 @@
             "  int i;",
             format("#define A string s = \"1234567890\"; int i;",
                    getLLVMStyleWithColumns(20)));
-  // FIXME: Put additional penalties on breaking at non-whitespace locations.
-  EXPECT_EQ("someFunction(\"aaabbbcc \"\n"
-            "             \"dddeeeff\"\n"
-            "             \"f\");",
+  EXPECT_EQ("someFunction(\n"
+            "    \"aaabbbcc \"\n"
+            "    \"dddeeefff\");",
             format("someFunction(\"aaabbbcc dddeeefff\");",
                    getLLVMStyleWithColumns(25)));
 }
@@ -9895,6 +9894,108 @@
   EXPECT_EQ("#pragma option -C -A", format("#pragma    option   -C   -A"));
 }
 
+TEST_F(FormatTest, OptimizeBreakPenaltyVsExcess) {
+  FormatStyle Style = getLLVMStyle();
+  Style.ColumnLimit = 20;
+
+  verifyFormat("int a; // the\n"
+               "       // comment", Style);
+  EXPECT_EQ("int a; /* first line\n"
+            "        * second\n"
+            "        * line third\n"
+            "        * line\n"
+            "        */",
+            format("int a; /* first line\n"
+                   "        * second\n"
+                   "        * line third\n"
+                   "        * line\n"
+                   "        */",
+                   Style));
+  EXPECT_EQ("int a; // first line\n"
+            "       // second\n"
+            "       // line third\n"
+            "       // line",
+            format("int a; // first line\n"
+                   "       // second line\n"
+                   "       // third line",
+                   Style));
+
+  Style.PenaltyExcessCharacter = 90;
+  verifyFormat("int a; // the comment", Style);
+  EXPECT_EQ("int a; // the\n"
+            "       // comment aa",
+            format("int a; // the comment aa", Style));
+  EXPECT_EQ("int a; // first line\n"
+            "       // second line\n"
+            "       // third line",
+            format("int a; // first line\n"
+                   "       // second line\n"
+                   "       // third line",
+                   Style));
+  EXPECT_EQ("int a; /* first line\n"
+            "        * second line\n"
+            "        * third line\n"
+            "        */",
+            format("int a; /* first line\n"
+                   "        * second line\n"
+                   "        * third line\n"
+                   "        */",
+                   Style));
+  // FIXME: Investigate why this is not getting the same layout as the test
+  // above.
+  EXPECT_EQ("int a; /* first line\n"
+            "        * second\n"
+            "        * line third\n"
+            "        * line\n"
+            "        */",
+            format("int a; /* first line second line third line"
+                   "\n*/",
+                   Style));
+
+  EXPECT_EQ("// foo bar baz bazfoo\n"
+            "// foo bar\n",
+            format("// foo bar baz bazfoo\n"
+                   "// foo            bar\n",
+                   Style));
+  EXPECT_EQ("// foo bar baz bazfoo\n"
+            "// foo bar\n",
+            format("// foo bar baz      bazfoo\n"
+                   "// foo            bar\n",
+                   Style));
+
+  // FIXME: Optimally, we'd keep bazfoo on the first line and reflow bar to the
+  // next one.
+  EXPECT_EQ("// foo bar baz\n"
+            "// bazfoo bar foo\n"
+            "// bar\n",
+            format("// foo bar baz      bazfoo bar\n"
+                   "// foo            bar\n",
+                   Style));
+
+  EXPECT_EQ("// foo bar baz bazfoo\n"
+            "// foo bar baz\n"
+            "// bazfoo bar foo\n"
+            "// bar\n",
+            format("// foo bar baz      bazfoo\n"
+                   "// foo bar baz      bazfoo bar\n"
+                   "// foo bar\n",
+                   Style));
+
+  // FIXME: Optimally, we'd keep 'bar' in the last line at the end of the line,
+  // as it does not actually protrude far enough to make breaking pay off.
+  // Unfortunately, due to how reflowing is currently implemented, we already
+  // check the column limit after the reflowing decision and extend the reflow
+  // range, so we will not take whitespace compression into account.
+  EXPECT_EQ("// foo bar baz bazfoo\n"
+            "// foo bar baz\n"
+            "// bazfoo bar foo\n"
+            "// bar\n",
+            format("// foo bar baz      bazfoo\n"
+                   "// foo bar baz      bazfoo bar\n"
+                   "// foo           bar\n",
+                   Style));
+}
+
 #define EXPECT_ALL_STYLES_EQUAL(Styles)                                        \
   for (size_t i = 1; i < Styles.size(); ++i)                                   \
   EXPECT_EQ(Styles[0], Styles[i]) << "Style #" << i << " of " << Styles.size() \
diff --git a/clang/unittests/Format/FormatTestComments.cpp b/clang/unittests/Format/FormatTestComments.cpp
index 5467e73..cde3334 100644
--- a/clang/unittests/Format/FormatTestComments.cpp
+++ b/clang/unittests/Format/FormatTestComments.cpp
@@ -2078,6 +2078,22 @@
                    "       // longsec\n",
                    getLLVMStyleWithColumns(20)));
 
+  // Simple case that correctly handles reflow in parameter lists.
+  EXPECT_EQ("a = f(/* looooooooong\n"
+            "       * long long\n"
+            "       */\n"
+            "      a);",
+            format("a = f(/* looooooooong long\n* long\n*/ a);",
+                   getLLVMStyleWithColumns(22)));
+  // Tricky case that has fewer lines if we reflow the comment, ending up with
+  // fewer lines.
+  EXPECT_EQ("a = f(/* loooooong\n"
+            "       * long long\n"
+            "       */\n"
+            "      a);",
+            format("a = f(/* loooooong long\n* long\n*/ a);",
+                   getLLVMStyleWithColumns(22)));
+
   // Keep empty comment lines.
   EXPECT_EQ("/**/", format(" /**/", getLLVMStyleWithColumns(20)));
   EXPECT_EQ("/* */", format(" /* */", getLLVMStyleWithColumns(20)));
@@ -2426,9 +2442,13 @@
 
 TEST_F(FormatTestComments, BreaksAfterMultilineBlockCommentsInParamLists) {
   EXPECT_EQ("a = f(/* long\n"
-            "         long\n"
-            "       */\n"
+            "         long */\n"
             "      a);",
+            format("a = f(/* long long */ a);", getLLVMStyleWithColumns(16)));
+  EXPECT_EQ("a = f(\n"
+            "    /* long\n"
+            "       long */\n"
+            "    a);",
             format("a = f(/* long long */ a);", getLLVMStyleWithColumns(15)));
 
   EXPECT_EQ("a = f(/* long\n"
@@ -2438,7 +2458,7 @@
             format("a = f(/* long\n"
                    "         long\n"
                    "       */a);",
-                   getLLVMStyleWithColumns(15)));
+                   getLLVMStyleWithColumns(16)));
 
   EXPECT_EQ("a = f(/* long\n"
             "         long\n"
@@ -2447,7 +2467,7 @@
             format("a = f(/* long\n"
                    "         long\n"
                    "       */ a);",
-                   getLLVMStyleWithColumns(15)));
+                   getLLVMStyleWithColumns(16)));
 
   EXPECT_EQ("a = f(/* long\n"
             "         long\n"
@@ -2456,23 +2476,35 @@
             format("a = f(/* long\n"
                    "         long\n"
                    "       */ (1 + 1));",
-                   getLLVMStyleWithColumns(15)));
+                   getLLVMStyleWithColumns(16)));
 
   EXPECT_EQ(
       "a = f(a,\n"
       "      /* long\n"
-      "         long\n"
-      "       */\n"
+      "         long */\n"
       "      b);",
+      format("a = f(a, /* long long */ b);", getLLVMStyleWithColumns(16)));
+  EXPECT_EQ(
+      "a = f(\n"
+      "    a,\n"
+      "    /* long\n"
+      "       long */\n"
+      "    b);",
       format("a = f(a, /* long long */ b);", getLLVMStyleWithColumns(15)));
 
-  EXPECT_EQ(
-      "a = f(a,\n"
-      "      /* long\n"
-      "         long\n"
-      "       */\n"
-      "      (1 + 1));",
-      format("a = f(a, /* long long */ (1 + 1));", getLLVMStyleWithColumns(15)));
+  EXPECT_EQ("a = f(a,\n"
+            "      /* long\n"
+            "         long */\n"
+            "      (1 + 1));",
+            format("a = f(a, /* long long */ (1 + 1));",
+                   getLLVMStyleWithColumns(16)));
+  EXPECT_EQ("a = f(\n"
+            "    a,\n"
+            "    /* long\n"
+            "       long */\n"
+            "    (1 + 1));",
+            format("a = f(a, /* long long */ (1 + 1));",
+                   getLLVMStyleWithColumns(15)));
 }
 
 TEST_F(FormatTestComments, IndentLineCommentsInStartOfBlockAtEndOfFile) {