clang-format: do not reflow bullet lists

Summary:
This patch prevents reflowing bullet lists in block comments.

It handles all lists supported by doxygen and markdown, e.g. bullet
lists starting with '-', '*', '+', as well as numbered lists starting
with -# or a number followed by a dot.

Reviewers: krasimir

Reviewed By: krasimir

Subscribers: djasper, klimek, cfe-commits

Differential Revision: https://reviews.llvm.org/D33285

llvm-svn: 303556
diff --git a/clang/lib/Format/BreakableToken.cpp b/clang/lib/Format/BreakableToken.cpp
index f70597c..3c9df62 100644
--- a/clang/lib/Format/BreakableToken.cpp
+++ b/clang/lib/Format/BreakableToken.cpp
@@ -78,6 +78,14 @@
   }
 
   StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
+
+  // Do not split before a number followed by a dot: this would be interpreted
+  // as a numbered list, which would prevent re-flowing in subsequent passes.
+  static llvm::Regex kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\.");
+  if (SpaceOffset != StringRef::npos &&
+      kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks)))
+    SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
+
   if (SpaceOffset == StringRef::npos ||
       // Don't break at leading whitespace.
       Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) {
@@ -299,8 +307,9 @@
 static bool mayReflowContent(StringRef Content) {
   Content = Content.trim(Blanks);
   // Lines starting with '@' commonly have special meaning.
-  static const SmallVector<StringRef, 4> kSpecialMeaningPrefixes = {
-      "@", "TODO", "FIXME", "XXX"};
+  // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists.
+  static const SmallVector<StringRef, 8> kSpecialMeaningPrefixes = {
+      "@", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* " };
   bool hasSpecialMeaningPrefix = false;
   for (StringRef Prefix : kSpecialMeaningPrefixes) {
     if (Content.startswith(Prefix)) {
@@ -308,6 +317,14 @@
       break;
     }
   }
+
+  // Numbered lists may also start with a number followed by '.'
+  // To avoid issues if a line starts with a number which is actually the end
+  // of a previous line, we only consider numbers with up to 2 digits.
+  static llvm::Regex kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. ");
+  hasSpecialMeaningPrefix = hasSpecialMeaningPrefix ||
+                            kNumberedListRegexp.match(Content);
+
   // Simple heuristic for what to reflow: content should contain at least two
   // characters and either the first or second character must be
   // non-punctuation.