libclang API for comment-to-xml conversion.

The implementation also includes a Relax NG schema and tests for the schema
itself.  The schema is used in c-index-test to verify that XML documents we
produce are valid.  In order to do the validation, we add an optional libxml2
dependency for c-index-test.

Credits for CMake part go to Doug Gregor.  Credits for Autoconf part go to Eric
Christopher.  Thanks!


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@161431 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/tools/libclang/CXComment.cpp b/tools/libclang/CXComment.cpp
index fe6fddb..acb4353 100644
--- a/tools/libclang/CXComment.cpp
+++ b/tools/libclang/CXComment.cpp
@@ -14,9 +14,14 @@
 #include "clang-c/Index.h"
 #include "CXString.h"
 #include "CXComment.h"
+#include "CXCursor.h"
+#include "CXTranslationUnit.h"
 
 #include "clang/AST/CommentVisitor.h"
+#include "clang/AST/Decl.h"
+#include "clang/Frontend/ASTUnit.h"
 
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -826,3 +831,386 @@
 
 } // end extern "C"
 
+namespace {
+class CommentASTToXMLConverter :
+    public ConstCommentVisitor<CommentASTToXMLConverter> {
+public:
+  /// \param Str accumulator for XML.
+  CommentASTToXMLConverter(const SourceManager &SM,
+                           SmallVectorImpl<char> &Str) :
+    SM(SM), Result(Str) { }
+
+  // Inline content.
+  void visitTextComment(const TextComment *C);
+  void visitInlineCommandComment(const InlineCommandComment *C);
+  void visitHTMLStartTagComment(const HTMLStartTagComment *C);
+  void visitHTMLEndTagComment(const HTMLEndTagComment *C);
+
+  // Block content.
+  void visitParagraphComment(const ParagraphComment *C);
+  void visitBlockCommandComment(const BlockCommandComment *C);
+  void visitParamCommandComment(const ParamCommandComment *C);
+  void visitTParamCommandComment(const TParamCommandComment *C);
+  void visitVerbatimBlockComment(const VerbatimBlockComment *C);
+  void visitVerbatimBlockLineComment(const VerbatimBlockLineComment *C);
+  void visitVerbatimLineComment(const VerbatimLineComment *C);
+
+  void visitFullComment(const FullComment *C);
+
+  // Helpers.
+  void appendToResultWithXMLEscaping(StringRef S);
+
+private:
+  const SourceManager &SM;
+  /// Output stream for XML.
+  llvm::raw_svector_ostream Result;
+};
+} // end unnamed namespace
+
+void CommentASTToXMLConverter::visitTextComment(const TextComment *C) {
+  appendToResultWithXMLEscaping(C->getText());
+}
+
+void CommentASTToXMLConverter::visitInlineCommandComment(const InlineCommandComment *C) {
+  // Nothing to render if no arguments supplied.
+  if (C->getNumArgs() == 0)
+    return;
+
+  // Nothing to render if argument is empty.
+  StringRef Arg0 = C->getArgText(0);
+  if (Arg0.empty())
+    return;
+
+  switch (C->getRenderKind()) {
+  case InlineCommandComment::RenderNormal:
+    for (unsigned i = 0, e = C->getNumArgs(); i != e; ++i) {
+      appendToResultWithXMLEscaping(C->getArgText(i));
+      Result << " ";
+    }
+    return;
+  case InlineCommandComment::RenderBold:
+    assert(C->getNumArgs() == 1);
+    Result << "<bold>";
+    appendToResultWithXMLEscaping(Arg0);
+    Result << "</bold>";
+    return;
+  case InlineCommandComment::RenderMonospaced:
+    assert(C->getNumArgs() == 1);
+    Result << "<monospaced>";
+    appendToResultWithXMLEscaping(Arg0);
+    Result << "</monospaced>";
+    return;
+  case InlineCommandComment::RenderEmphasized:
+    assert(C->getNumArgs() == 1);
+    Result << "<emphasized>";
+    appendToResultWithXMLEscaping(Arg0);
+    Result << "</emphasized>";
+    return;
+  }
+}
+
+void CommentASTToXMLConverter::visitHTMLStartTagComment(const HTMLStartTagComment *C) {
+  Result << "<rawHTML><![CDATA[";
+  PrintHTMLStartTagComment(C, Result);
+  Result << "]]></rawHTML>";
+}
+
+void CommentASTToXMLConverter::visitHTMLEndTagComment(const HTMLEndTagComment *C) {
+  Result << "<rawHTML>&lt;/" << C->getTagName() << "&gt;</rawHTML>";
+}
+
+void CommentASTToXMLConverter::visitParagraphComment(const ParagraphComment *C) {
+  if (C->isWhitespace())
+    return;
+
+  Result << "<Para>";
+  for (Comment::child_iterator I = C->child_begin(), E = C->child_end();
+       I != E; ++I) {
+    visit(*I);
+  }
+  Result << "</Para>";
+}
+
+void CommentASTToXMLConverter::visitBlockCommandComment(const BlockCommandComment *C) {
+  visit(C->getParagraph());
+}
+
+void CommentASTToXMLConverter::visitParamCommandComment(const ParamCommandComment *C) {
+  Result << "<Parameter><Name>";
+  appendToResultWithXMLEscaping(C->getParamName());
+  Result << "</Name>";
+
+  if (C->isParamIndexValid())
+    Result << "<Index>" << C->getParamIndex() << "</Index>";
+
+  Result << "<Direction isExplicit=\"" << C->isDirectionExplicit() << "\">";
+  switch (C->getDirection()) {
+  case ParamCommandComment::In:
+    Result << "in";
+    break;
+  case ParamCommandComment::Out:
+    Result << "out";
+    break;
+  case ParamCommandComment::InOut:
+    Result << "in,out";
+    break;
+  }
+  Result << "</Direction><Discussion>";
+  visit(C->getParagraph());
+  Result << "</Discussion></Parameter>";
+}
+
+void CommentASTToXMLConverter::visitTParamCommandComment(
+                                  const TParamCommandComment *C) {
+  Result << "<Parameter><Name>";
+  appendToResultWithXMLEscaping(C->getParamName());
+  Result << "</Name>";
+
+  if (C->isPositionValid() && C->getDepth() == 1) {
+    Result << "<Index>" << C->getIndex(0) << "</Index>";
+  }
+
+  Result << "<Discussion>";
+  visit(C->getParagraph());
+  Result << "</Discussion></Parameter>";
+}
+
+void CommentASTToXMLConverter::visitVerbatimBlockComment(
+                                  const VerbatimBlockComment *C) {
+  unsigned NumLines = C->getNumLines();
+  if (NumLines == 0)
+    return;
+
+  Result << llvm::StringSwitch<const char *>(C->getCommandName())
+      .Case("code", "<Verbatim kind=\"code\">")
+      .Default("<Verbatim kind=\"verbatim\">");
+  for (unsigned i = 0; i != NumLines; ++i) {
+    appendToResultWithXMLEscaping(C->getText(i));
+    if (i + 1 != NumLines)
+      Result << '\n';
+  }
+  Result << "</Verbatim>";
+}
+
+void CommentASTToXMLConverter::visitVerbatimBlockLineComment(
+                                  const VerbatimBlockLineComment *C) {
+  llvm_unreachable("should not see this AST node");
+}
+
+void CommentASTToXMLConverter::visitVerbatimLineComment(
+                                  const VerbatimLineComment *C) {
+  Result << "<Verbatim kind=\"verbatim\">";
+  appendToResultWithXMLEscaping(C->getText());
+  Result << "</Verbatim>";
+}
+
+void CommentASTToXMLConverter::visitFullComment(const FullComment *C) {
+  FullCommentParts Parts(C);
+
+  const DeclInfo *DI = C->getDeclInfo();
+  StringRef RootEndTag;
+  if (DI) {
+    switch (DI->getKind()) {
+    case DeclInfo::OtherKind:
+      RootEndTag = "</Other>";
+      Result << "<Other";
+      break;
+    case DeclInfo::FunctionKind:
+      RootEndTag = "</Function>";
+      Result << "<Function";
+      switch (DI->TemplateKind) {
+      case DeclInfo::NotTemplate:
+        break;
+      case DeclInfo::Template:
+        Result << " templateKind=\"template\"";
+        break;
+      case DeclInfo::TemplateSpecialization:
+        Result << " templateKind=\"specialization\"";
+        break;
+      case DeclInfo::TemplatePartialSpecialization:
+        llvm_unreachable("partial specializations of functions "
+                         "are not allowed in C++");
+      }
+      if (DI->IsInstanceMethod)
+        Result << " isInstanceMethod=\"1\"";
+      if (DI->IsClassMethod)
+        Result << " isClassMethod=\"1\"";
+      break;
+    case DeclInfo::ClassKind:
+      RootEndTag = "</Class>";
+      Result << "<Class";
+      switch (DI->TemplateKind) {
+      case DeclInfo::NotTemplate:
+        break;
+      case DeclInfo::Template:
+        Result << " templateKind=\"template\"";
+        break;
+      case DeclInfo::TemplateSpecialization:
+        Result << " templateKind=\"specialization\"";
+        break;
+      case DeclInfo::TemplatePartialSpecialization:
+        Result << " templateKind=\"partialSpecialization\"";
+        break;
+      }
+      break;
+    case DeclInfo::VariableKind:
+      RootEndTag = "</Variable>";
+      Result << "<Variable";
+      break;
+    case DeclInfo::NamespaceKind:
+      RootEndTag = "</Namespace>";
+      Result << "<Namespace";
+      break;
+    case DeclInfo::TypedefKind:
+      RootEndTag = "</Typedef>";
+      Result << "<Typedef";
+      break;
+    }
+
+    {
+      // Print line and column number.
+      SourceLocation Loc = DI->ThisDecl->getLocation();
+      std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
+      FileID FID = LocInfo.first;
+      unsigned FileOffset = LocInfo.second;
+
+      if (!FID.isInvalid()) {
+        if (const FileEntry *FE = SM.getFileEntryForID(FID)) {
+          Result << " file=\"";
+          appendToResultWithXMLEscaping(FE->getName());
+          Result << "\"";
+        }
+        Result << " line=\"" << SM.getLineNumber(FID, FileOffset)
+               << "\" column=\"" << SM.getColumnNumber(FID, FileOffset)
+               << "\"";
+      }
+    }
+
+    // Finish the root tag.
+    Result << ">";
+
+    bool FoundName = false;
+    if (const NamedDecl *ND = dyn_cast<NamedDecl>(DI->ThisDecl)) {
+      if (DeclarationName DeclName = ND->getDeclName()) {
+        Result << "<Name>";
+        std::string Name = DeclName.getAsString();
+        appendToResultWithXMLEscaping(Name);
+        FoundName = true;
+        Result << "</Name>";
+      }
+    }
+    if (!FoundName)
+      Result << "<Name>&lt;anonymous&gt;</Name>";
+
+    {
+      // Print USR.
+      SmallString<128> USR;
+      cxcursor::getDeclCursorUSR(DI->ThisDecl, USR);
+      if (!USR.empty()) {
+        Result << "<USR>";
+        appendToResultWithXMLEscaping(USR);
+        Result << "</USR>";
+      }
+    }
+  } else {
+    // No DeclInfo -- just emit some root tag and name tag.
+    RootEndTag = "</Other>";
+    Result << "<Other><Name>unknown</Name>";
+  }
+
+  bool FirstParagraphIsBrief = false;
+  if (Parts.Brief) {
+    Result << "<Abstract>";
+    visit(Parts.Brief);
+    Result << "</Abstract>";
+  } else if (Parts.FirstParagraph) {
+    Result << "<Abstract>";
+    visit(Parts.FirstParagraph);
+    Result << "</Abstract>";
+    FirstParagraphIsBrief = true;
+  }
+
+  if (Parts.TParams.size() != 0) {
+    Result << "<TemplateParameters>";
+    for (unsigned i = 0, e = Parts.TParams.size(); i != e; ++i)
+      visit(Parts.TParams[i]);
+    Result << "</TemplateParameters>";
+  }
+
+  if (Parts.Params.size() != 0) {
+    Result << "<Parameters>";
+    for (unsigned i = 0, e = Parts.Params.size(); i != e; ++i)
+      visit(Parts.Params[i]);
+    Result << "</Parameters>";
+  }
+
+  if (Parts.Returns) {
+    Result << "<ResultDiscussion>";
+    visit(Parts.Returns);
+    Result << "</ResultDiscussion>";
+  }
+
+  {
+    bool StartTagEmitted = false;
+    for (unsigned i = 0, e = Parts.MiscBlocks.size(); i != e; ++i) {
+      const Comment *C = Parts.MiscBlocks[i];
+      if (FirstParagraphIsBrief && C == Parts.FirstParagraph)
+        continue;
+      if (!StartTagEmitted) {
+        Result << "<Discussion>";
+        StartTagEmitted = true;
+      }
+      visit(C);
+    }
+    if (StartTagEmitted)
+      Result << "</Discussion>";
+  }
+
+  Result << RootEndTag;
+
+  Result.flush();
+}
+
+void CommentASTToXMLConverter::appendToResultWithXMLEscaping(StringRef S) {
+  for (StringRef::iterator I = S.begin(), E = S.end(); I != E; ++I) {
+    const char C = *I;
+    switch (C) {
+      case '&':
+        Result << "&amp;";
+        break;
+      case '<':
+        Result << "&lt;";
+        break;
+      case '>':
+        Result << "&gt;";
+        break;
+      case '"':
+        Result << "&quot;";
+        break;
+      case '\'':
+        Result << "&apos;";
+        break;
+      default:
+        Result << C;
+        break;
+    }
+  }
+}
+
+extern "C" {
+
+CXString clang_FullComment_getAsXML(CXTranslationUnit TU, CXComment CXC) {
+  const FullComment *FC = getASTNodeAs<FullComment>(CXC);
+  if (!FC)
+    return createCXString((const char *) 0);
+
+  SourceManager &SM = static_cast<ASTUnit *>(TU->TUData)->getSourceManager();
+
+  SmallString<1024> XML;
+  CommentASTToXMLConverter Converter(SM, XML);
+  Converter.visit(FC);
+  return createCXString(XML.str(), /* DupString = */ true);
+}
+
+} // end extern "C"
+