[clang-format] Support python-style comments in text protos

Summary: This patch adds support for python-style comments in text protos.

Reviewers: djasper

Reviewed By: djasper

Subscribers: bkramer, cfe-commits, klimek

Differential Revision: https://reviews.llvm.org/D39806

llvm-svn: 317886
diff --git a/clang/lib/Format/BreakableToken.cpp b/clang/lib/Format/BreakableToken.cpp
index bbf3d88..ad1ec9f 100644
--- a/clang/lib/Format/BreakableToken.cpp
+++ b/clang/lib/Format/BreakableToken.cpp
@@ -40,9 +40,15 @@
   }
 }
 
-static StringRef getLineCommentIndentPrefix(StringRef Comment) {
-  static const char *const KnownPrefixes[] = {"///<", "//!<", "///", "//",
-                                              "//!"};
+static StringRef getLineCommentIndentPrefix(StringRef Comment,
+                                            const FormatStyle &Style) {
+  static const char *const KnownCStylePrefixes[] = {"///<", "//!<", "///", "//",
+                                                    "//!"};
+  static const char *const KnownTextProtoPrefixes[] = {"//", "#"};
+  ArrayRef<const char *> KnownPrefixes(KnownCStylePrefixes);
+  if (Style.Language == FormatStyle::LK_TextProto)
+    KnownPrefixes = KnownTextProtoPrefixes;
+
   StringRef LongestPrefix;
   for (StringRef KnownPrefix : KnownPrefixes) {
     if (Comment.startswith(KnownPrefix)) {
@@ -732,7 +738,8 @@
        CurrentTok = CurrentTok->Next) {
     LastLineTok = LineTok;
     StringRef TokenText(CurrentTok->TokenText);
-    assert(TokenText.startswith("//"));
+    assert((TokenText.startswith("//") || TokenText.startswith("#")) &&
+           "unsupported line comment prefix, '//' and '#' are supported");
     size_t FirstLineIndex = Lines.size();
     TokenText.split(Lines, "\n");
     Content.resize(Lines.size());
@@ -745,8 +752,9 @@
       // We need to trim the blanks in case this is not the first line in a
       // multiline comment. Then the indent is included in Lines[i].
       StringRef IndentPrefix =
-          getLineCommentIndentPrefix(Lines[i].ltrim(Blanks));
-      assert(IndentPrefix.startswith("//"));
+          getLineCommentIndentPrefix(Lines[i].ltrim(Blanks), Style);
+      assert((TokenText.startswith("//") || TokenText.startswith("#")) &&
+             "unsupported line comment prefix, '//' and '#' are supported");
       OriginalPrefix[i] = Prefix[i] = IndentPrefix;
       if (Lines[i].size() > Prefix[i].size() &&
           isAlphanumeric(Lines[i][Prefix[i].size()])) {
@@ -760,6 +768,9 @@
           Prefix[i] = "///< ";
         else if (Prefix[i] == "//!<")
           Prefix[i] = "//!< ";
+        else if (Prefix[i] == "#" &&
+                 Style.Language == FormatStyle::LK_TextProto)
+          Prefix[i] = "# ";
       }
 
       Tokens[i] = LineTok;
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 727437a..199d297 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -50,6 +50,8 @@
       tryParseJSRegexLiteral();
       handleTemplateStrings();
     }
+    if (Style.Language == FormatStyle::LK_TextProto)
+      tryParsePythonComment();
     tryMergePreviousTokens();
     if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
       FirstInLineIndex = Tokens.size() - 1;
@@ -330,6 +332,27 @@
   resetLexer(SourceMgr.getFileOffset(loc));
 }
 
+void FormatTokenLexer::tryParsePythonComment() {
+  FormatToken *HashToken = Tokens.back();
+  if (HashToken->isNot(tok::hash))
+    return;
+  // Turn the remainder of this line into a comment.
+  const char *CommentBegin =
+      Lex->getBufferLocation() - HashToken->TokenText.size(); // at "#"
+  size_t From = CommentBegin - Lex->getBuffer().begin();
+  size_t To = Lex->getBuffer().find_first_of('\n', From);
+  if (To == StringRef::npos)
+    To = Lex->getBuffer().size();
+  size_t Len = To - From;
+  HashToken->Type = TT_LineComment;
+  HashToken->Tok.setKind(tok::comment);
+  HashToken->TokenText = Lex->getBuffer().substr(From, Len);
+  SourceLocation Loc = To < Lex->getBuffer().size()
+                           ? Lex->getSourceLocation(CommentBegin + Len)
+                           : SourceMgr.getLocForEndOfFile(ID);
+  resetLexer(SourceMgr.getFileOffset(Loc));
+}
+
 bool FormatTokenLexer::tryMerge_TMacro() {
   if (Tokens.size() < 4)
     return false;
diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h
index 6605a1c..59dc2a7 100644
--- a/clang/lib/Format/FormatTokenLexer.h
+++ b/clang/lib/Format/FormatTokenLexer.h
@@ -73,6 +73,8 @@
   // nested template parts by balancing curly braces.
   void handleTemplateStrings();
 
+  void tryParsePythonComment();
+
   bool tryMerge_TMacro();
 
   bool tryMergeConflictMarkers();
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 5d09238..55518d6 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -56,7 +56,7 @@
 };
 
 static bool isLineComment(const FormatToken &FormatTok) {
-  return FormatTok.is(tok::comment) && FormatTok.TokenText.startswith("//");
+  return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
 }
 
 // Checks if \p FormatTok is a line comment that continues the line comment