libclang API for comment-to-xml conversion.

The implementation also includes a Relax NG schema and tests for the schema
itself.  The schema is used in c-index-test to verify that XML documents we
produce are valid.  In order to do the validation, we add an optional libxml2
dependency for c-index-test.

Credits for CMake part go to Doug Gregor.  Credits for Autoconf part go to Eric
Christopher.  Thanks!


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@161431 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/tools/c-index-test/CMakeLists.txt b/tools/c-index-test/CMakeLists.txt
index afe49ed..6379194 100644
--- a/tools/c-index-test/CMakeLists.txt
+++ b/tools/c-index-test/CMakeLists.txt
@@ -14,3 +14,10 @@
 set_target_properties(c-index-test
   PROPERTIES
   LINKER_LANGUAGE CXX)
+
+# If libxml2 is available, make it available for c-index-test.
+if (LIBXML2_FOUND)
+  add_definitions(${LIBXML2_DEFINITIONS} "-DCLANG_HAVE_LIBXML")
+  include_directories(${LIBXML2_INCLUDE_DIR})
+  target_link_libraries(c-index-test ${LIBXML2_LIBRARIES})
+endif()
diff --git a/tools/c-index-test/Makefile b/tools/c-index-test/Makefile
index 932dbb2..25478e1 100644
--- a/tools/c-index-test/Makefile
+++ b/tools/c-index-test/Makefile
@@ -28,3 +28,6 @@
 	   clangBasic.a
 
 include $(CLANG_LEVEL)/Makefile
+
+LIBS += "$(LIBXML2_LIBS)"
+CPPFLAGS += "$(LIBXML2_INC)"
diff --git a/tools/c-index-test/c-index-test.c b/tools/c-index-test/c-index-test.c
index 4af2548..f9b5cdf 100644
--- a/tools/c-index-test/c-index-test.c
+++ b/tools/c-index-test/c-index-test.c
@@ -2,12 +2,19 @@
 
 #include "clang-c/Index.h"
 #include "clang-c/CXCompilationDatabase.h"
+#include "llvm/Config/config.h"
 #include <ctype.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
 #include <assert.h>
 
+#ifdef CLANG_HAVE_LIBXML
+#include <libxml/parser.h>
+#include <libxml/relaxng.h>
+#include <libxml/xmlerror.h>
+#endif
+
 /******************************************************************************/
 /* Utility functions.                                                         */
 /******************************************************************************/
@@ -179,6 +186,19 @@
   return 0;
 }
 
+static const char *parse_comments_schema(int argc, const char **argv) {
+  const char *CommentsSchemaArg = "-comments-xml-schema=";
+  const char *CommentSchemaFile = NULL;
+
+  if (argc == 0)
+    return CommentSchemaFile;
+
+  if (!strncmp(argv[0], CommentsSchemaArg, strlen(CommentsSchemaArg)))
+    CommentSchemaFile = argv[0] + strlen(CommentsSchemaArg);
+
+  return CommentSchemaFile;
+}
+
 /******************************************************************************/
 /* Pretty-printing.                                                           */
 /******************************************************************************/
@@ -212,6 +232,10 @@
   clang_disposeString(Str);
 }
 
+static void PrintCXStringWithPrefix(const char *Prefix, CXString Str) {
+  PrintCStringWithPrefix(Prefix, clang_getCString(Str));
+}
+
 static void PrintCXStringWithPrefixAndDispose(const char *Prefix,
                                               CXString Str) {
   PrintCStringWithPrefix(Prefix, clang_getCString(Str));
@@ -437,7 +461,60 @@
   printf("]");
 }
 
-static void PrintCursorComments(CXCursor Cursor) {
+typedef struct {
+  const char *CommentSchemaFile;
+#ifdef CLANG_HAVE_LIBXML
+  xmlRelaxNGParserCtxtPtr RNGParser;
+  xmlRelaxNGPtr Schema;
+#endif
+} CommentXMLValidationData;
+
+static void ValidateCommentXML(const char *Str,
+                               CommentXMLValidationData *ValidationData) {
+#ifdef CLANG_HAVE_LIBXML
+  xmlDocPtr Doc;
+  xmlRelaxNGValidCtxtPtr ValidationCtxt;
+  int status;
+
+  if (!ValidationData || !ValidationData->CommentSchemaFile)
+    return;
+
+  if (!ValidationData->RNGParser) {
+    ValidationData->RNGParser =
+        xmlRelaxNGNewParserCtxt(ValidationData->CommentSchemaFile);
+    ValidationData->Schema = xmlRelaxNGParse(ValidationData->RNGParser);
+  }
+  if (!ValidationData->RNGParser) {
+    printf(" libXMLError");
+    return;
+  }
+
+  Doc = xmlParseDoc((const xmlChar *) Str);
+
+  if (!Doc) {
+    xmlErrorPtr Error = xmlGetLastError();
+    printf(" CommentXMLInvalid [not well-formed XML: %s]", Error->message);
+    return;
+  }
+
+  ValidationCtxt = xmlRelaxNGNewValidCtxt(ValidationData->Schema);
+  status = xmlRelaxNGValidateDoc(ValidationCtxt, Doc);
+  if (!status)
+    printf(" CommentXMLValid");
+  else if (status > 0) {
+    xmlErrorPtr Error = xmlGetLastError();
+    printf(" CommentXMLInvalid [not vaild XML: %s]", Error->message);
+  } else
+    printf(" libXMLError");
+
+  xmlRelaxNGFreeValidCtxt(ValidationCtxt);
+  xmlFreeDoc(Doc);
+#endif
+}
+
+static void PrintCursorComments(CXTranslationUnit TU,
+                                CXCursor Cursor,
+                                CommentXMLValidationData *ValidationData) {
   {
     CXString RawComment;
     const char *RawCommentCString;
@@ -464,12 +541,21 @@
     if (clang_Comment_getKind(Comment) != CXComment_Null) {
       PrintCXStringWithPrefixAndDispose("FullCommentAsHTML",
                                         clang_FullComment_getAsHTML(Comment));
+      {
+        CXString XML;
+        XML = clang_FullComment_getAsXML(TU, Comment);
+        PrintCXStringWithPrefix("FullCommentAsXML", XML);
+        ValidateCommentXML(clang_getCString(XML), ValidationData);
+        clang_disposeString(XML);
+      }
+
       DumpCXComment(Comment);
     }
   }
 }
 
-static void PrintCursor(CXCursor Cursor) {
+static void PrintCursor(CXCursor Cursor,
+                        CommentXMLValidationData *ValidationData) {
   CXTranslationUnit TU = clang_Cursor_getTranslationUnit(Cursor);
   if (clang_isInvalid(Cursor.kind)) {
     CXString ks = clang_getCursorKindSpelling(Cursor.kind);
@@ -674,7 +760,7 @@
         PrintRange(RefNameRange, "RefName");
     }
 
-    PrintCursorComments(Cursor);
+    PrintCursorComments(TU, Cursor, ValidationData);
   }
 }
 
@@ -802,10 +888,11 @@
   PrintRange(extent, "Extent");
 }
 
-/* Data used by all of the visitors. */
-typedef struct  {
+/* Data used by the visitors. */
+typedef struct {
   CXTranslationUnit TU;
   enum CXCursorKind *Filter;
+  CommentXMLValidationData ValidationData;
 } VisitorData;
 
 
@@ -819,7 +906,7 @@
     clang_getSpellingLocation(Loc, 0, &line, &column, 0);
     printf("// %s: %s:%d:%d: ", FileCheckPrefix,
            GetCursorSource(Cursor), line, column);
-    PrintCursor(Cursor);
+    PrintCursor(Cursor, &Data->ValidationData);
     PrintCursorExtent(Cursor);
     printf("\n");
     return CXChildVisit_Recurse;
@@ -872,7 +959,7 @@
       } else if (Ref.kind != CXCursor_FunctionDecl) {
         printf("// %s: %s:%d:%d: ", FileCheckPrefix, GetCursorSource(Ref),
                curLine, curColumn);
-        PrintCursor(Ref);
+        PrintCursor(Ref, &Data->ValidationData);
         printf("\n");
       }
     }
@@ -959,7 +1046,7 @@
   }
 
   if (linkage) {
-    PrintCursor(cursor);
+    PrintCursor(cursor, NULL);
     printf("linkage=%s\n", linkage);
   }
 
@@ -975,7 +1062,7 @@
   if (!clang_isInvalid(clang_getCursorKind(cursor))) {
     CXType T = clang_getCursorType(cursor);
     CXString S = clang_getTypeKindSpelling(T.kind);
-    PrintCursor(cursor);
+    PrintCursor(cursor, NULL);
     printf(" typekind=%s", clang_getCString(S));
     if (clang_isConstQualifiedType(T))
       printf(" const");
@@ -1035,7 +1122,8 @@
 static int perform_test_load(CXIndex Idx, CXTranslationUnit TU,
                              const char *filter, const char *prefix,
                              CXCursorVisitor Visitor,
-                             PostVisitTU PV) {
+                             PostVisitTU PV,
+                             const char *CommentSchemaFile) {
 
   if (prefix)
     FileCheckPrefix = prefix;
@@ -1066,6 +1154,11 @@
 
     Data.TU = TU;
     Data.Filter = ck;
+    Data.ValidationData.CommentSchemaFile = CommentSchemaFile;
+#ifdef CLANG_HAVE_LIBXML
+    Data.ValidationData.RNGParser = NULL;
+    Data.ValidationData.Schema = NULL;
+#endif
     clang_visitChildren(clang_getTranslationUnitCursor(TU), Visitor, &Data);
   }
 
@@ -1097,7 +1190,7 @@
     return 1;
   }
 
-  result = perform_test_load(Idx, TU, filter, prefix, Visitor, PV);
+  result = perform_test_load(Idx, TU, filter, prefix, Visitor, PV, NULL);
   clang_disposeIndex(Idx);
   return result;
 }
@@ -1107,6 +1200,7 @@
                              PostVisitTU PV) {
   CXIndex Idx;
   CXTranslationUnit TU;
+  const char *CommentSchemaFile;
   struct CXUnsavedFile *unsaved_files = 0;
   int num_unsaved_files = 0;
   int result;
@@ -1116,6 +1210,11 @@
                            !strcmp(filter, "local-display"))? 1 : 0,
                           /* displayDiagnosics=*/0);
 
+  if ((CommentSchemaFile = parse_comments_schema(argc, argv))) {
+    argc--;
+    argv++;
+  }
+
   if (parse_remapped_files(argc, argv, 0, &unsaved_files, &num_unsaved_files)) {
     clang_disposeIndex(Idx);
     return -1;
@@ -1133,7 +1232,8 @@
     return 1;
   }
 
-  result = perform_test_load(Idx, TU, filter, NULL, Visitor, PV);
+  result = perform_test_load(Idx, TU, filter, NULL, Visitor, PV,
+                             CommentSchemaFile);
   free_remapped_files(unsaved_files, num_unsaved_files);
   clang_disposeIndex(Idx);
   return result;
@@ -1197,7 +1297,7 @@
       return -1;
   }
   
-  result = perform_test_load(Idx, TU, filter, NULL, Visitor, PV);
+  result = perform_test_load(Idx, TU, filter, NULL, Visitor, PV, NULL);
 
   free_remapped_files(unsaved_files, num_unsaved_files);
   clang_disposeIndex(Idx);
@@ -1217,7 +1317,7 @@
     printf("-%s", prefix);
   PrintExtent(stdout, start_line, start_col, end_line, end_col);
   printf(" ");
-  PrintCursor(cursor);
+  PrintCursor(cursor, NULL);
   printf("\n");
 }
 
@@ -1814,7 +1914,7 @@
         unsigned line, column;
         clang_getSpellingLocation(CursorLoc, 0, &line, &column, 0);
         printf("%d:%d ", line, column);
-        PrintCursor(Cursor);
+        PrintCursor(Cursor, NULL);
         PrintCursorExtent(Cursor);
         Spelling = clang_getCursorSpelling(Cursor);
         cspell = clang_getCString(Spelling);
@@ -1859,7 +1959,7 @@
   if (clang_Range_isNull(range))
     return CXVisit_Continue;
 
-  PrintCursor(cursor);
+  PrintCursor(cursor, NULL);
   PrintRange(range, "");
   printf("\n");
   return CXVisit_Continue;
@@ -1943,7 +2043,7 @@
 
       if (I + 1 == Repeats) {
         CXCursorAndRangeVisitor visitor = { 0, findFileRefsVisit };
-        PrintCursor(Cursor);
+        PrintCursor(Cursor, NULL);
         printf("\n");
         clang_findReferencesInFile(Cursor, file, visitor);
         free(Locations[Loc].filename);
@@ -2141,7 +2241,7 @@
   for (i = 0; i != info->numAttributes; ++i) {
     const CXIdxAttrInfo *Attr = info->attributes[i];
     printf("     <attribute>: ");
-    PrintCursor(Attr->cursor);
+    PrintCursor(Attr->cursor, NULL);
   }
 }
 
@@ -2149,7 +2249,7 @@
                                const CXIdxBaseClassInfo *info) {
   printEntityInfo("     <base>", client_data, info->base);
   printf(" | cursor: ");
-  PrintCursor(info->cursor);
+  PrintCursor(info->cursor, NULL);
   printf(" | loc: ");
   printCXIndexLoc(info->loc, client_data);
 }
@@ -2161,7 +2261,7 @@
     printEntityInfo("     <protocol>", client_data,
                     ProtoInfo->protocols[i]->protocol);
     printf(" | cursor: ");
-    PrintCursor(ProtoInfo->protocols[i]->cursor);
+    PrintCursor(ProtoInfo->protocols[i]->cursor, NULL);
     printf(" | loc: ");
     printCXIndexLoc(ProtoInfo->protocols[i]->loc, client_data);
     printf("\n");
@@ -2251,7 +2351,7 @@
 
   printEntityInfo("[indexDeclaration]", client_data, info->entityInfo);
   printf(" | cursor: ");
-  PrintCursor(info->cursor);
+  PrintCursor(info->cursor, NULL);
   printf(" | loc: ");
   printCXIndexLoc(info->loc, client_data);
   printf(" | semantic-container: ");
@@ -2266,7 +2366,7 @@
   for (i = 0; i != info->numAttributes; ++i) {
     const CXIdxAttrInfo *Attr = info->attributes[i];
     printf("     <attribute>: ");
-    PrintCursor(Attr->cursor);
+    PrintCursor(Attr->cursor, NULL);
     printf("\n");
   }
 
@@ -2289,7 +2389,7 @@
     printEntityInfo("     <ObjCCategoryInfo>: class", client_data,
                     CatInfo->objcClass);
     printf(" | cursor: ");
-    PrintCursor(CatInfo->classCursor);
+    PrintCursor(CatInfo->classCursor, NULL);
     printf(" | loc: ");
     printCXIndexLoc(CatInfo->classLoc, client_data);
     printf("\n");
@@ -2333,7 +2433,7 @@
                                        const CXIdxEntityRefInfo *info) {
   printEntityInfo("[indexEntityReference]", client_data, info->referencedEntity);
   printf(" | cursor: ");
-  PrintCursor(info->cursor);
+  PrintCursor(info->cursor, NULL);
   printf(" | loc: ");
   printCXIndexLoc(info->loc, client_data);
   printEntityInfo(" | <parent>:", client_data, info->parentEntity);
@@ -2602,7 +2702,7 @@
     PrintExtent(stdout, start_line, start_column, end_line, end_column);
     if (!clang_isInvalid(cursors[i].kind)) {
       printf(" ");
-      PrintCursor(cursors[i]);
+      PrintCursor(cursors[i], NULL);
     }
     printf("\n");
   }
@@ -3255,6 +3355,10 @@
 }
 
 int main(int argc, const char **argv) {
+#ifdef CLANG_HAVE_LIBXML
+  LIBXML_TEST_VERSION
+#endif
+
   thread_info client_data;
 
   if (getenv("CINDEXTEST_NOTHREADS"))