Implement AST classes for comments, a real parser for Doxygen comments and a very simple semantic analysis that just builds the AST; minor changes for lexer to pick up source locations I didn't think about before. Comments AST is modelled along the ideas of HTML AST: block and inline content. * Block content is a paragraph or a command that has a paragraph as an argument or verbatim command. * Inline content is placed within some block. Inline content includes plain text, inline commands and HTML as tag soup. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@159790 91177308-0d34-0410-b5e6-96231b3b80d8

commit: 8d3ba23f2d9e6c87794d059412a0808c9cbacb25 [log] [tgz]
author: Dmitri Gribenko <gribozavr@gmail.com> Fri Jul 06 00:28:32 2012 +0000
committer: Dmitri Gribenko <gribozavr@gmail.com> Fri Jul 06 00:28:32 2012 +0000
tree: c72c618faeffa1c098c4df33857bd12a72c62fb1
parent: 1838703fea568b394407b83d1055b4c7f52fb105 [diff]
diff --git a/lib/AST/ASTContext.cpp b/lib/AST/ASTContext.cpp
index 66096f3..f65f9c0 100644
--- a/lib/AST/ASTContext.cpp
+++ b/lib/AST/ASTContext.cpp

@@ -13,6 +13,9 @@
 
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/CharUnits.h"
+#include "clang/AST/CommentLexer.h"
+#include "clang/AST/CommentSema.h"
+#include "clang/AST/CommentParser.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/DeclTemplate.h"
@@ -149,18 +152,47 @@
 const RawComment *ASTContext::getRawCommentForDecl(const Decl *D) const {
   // Check whether we have cached a comment string for this declaration
   // already.
-  llvm::DenseMap<const Decl *, const RawComment *>::iterator Pos
+  llvm::DenseMap<const Decl *, RawAndParsedComment>::iterator Pos
       = DeclComments.find(D);
-  if (Pos != DeclComments.end())
-      return Pos->second;
+  if (Pos != DeclComments.end()) {
+    RawAndParsedComment C = Pos->second;
+    return C.first;
+  }
 
   const RawComment *RC = getRawCommentForDeclNoCache(D);
   // If we found a comment, it should be a documentation comment.
   assert(!RC || RC->isDocumentation());
-  DeclComments[D] = RC;
+  DeclComments[D] = RawAndParsedComment(RC, NULL);
   return RC;
 }
 
+comments::FullComment *ASTContext::getCommentForDecl(const Decl *D) const {
+  llvm::DenseMap<const Decl *, RawAndParsedComment>::iterator Pos
+      = DeclComments.find(D);
+  const RawComment *RC;
+  if (Pos != DeclComments.end()) {
+    RawAndParsedComment C = Pos->second;
+    if (comments::FullComment *FC = C.second)
+      return FC;
+    RC = C.first;
+  } else
+    RC = getRawCommentForDecl(D);
+
+  if (!RC)
+    return NULL;
+
+  const StringRef RawText = RC->getRawText(SourceMgr);
+  comments::Lexer L(RC->getSourceRange().getBegin(), comments::CommentOptions(),
+                    RawText.begin(), RawText.end());
+
+  comments::Sema S(this->BumpAlloc);
+  comments::Parser P(L, S, this->BumpAlloc);
+
+  comments::FullComment *FC = P.parseFullComment();
+  DeclComments[D].second = FC;
+  return FC;
+}
+
 void 
 ASTContext::CanonicalTemplateTemplateParm::Profile(llvm::FoldingSetNodeID &ID, 
                                                TemplateTemplateParmDecl *Parm) {

diff --git a/lib/AST/CMakeLists.txt b/lib/AST/CMakeLists.txt
index 5dad60c..c45f721 100644
--- a/lib/AST/CMakeLists.txt
+++ b/lib/AST/CMakeLists.txt

@@ -8,8 +8,12 @@
   ASTImporter.cpp
   AttrImpl.cpp
   CXXInheritance.cpp
+  Comment.cpp
   CommentBriefParser.cpp
+  CommentDumper.cpp
   CommentLexer.cpp
+  CommentParser.cpp
+  CommentSema.cpp
   Decl.cpp
   DeclarationName.cpp
   DeclBase.cpp
@@ -60,6 +64,7 @@
   ClangAttrList
   ClangAttrImpl
   ClangDiagnosticAST
+  ClangCommentNodes
   ClangDeclNodes
   ClangStmtNodes
   )

diff --git a/lib/AST/Comment.cpp b/lib/AST/Comment.cpp
new file mode 100644
index 0000000..22277ad
--- /dev/null
+++ b/lib/AST/Comment.cpp

@@ -0,0 +1,90 @@
+//===--- Comment.cpp - Comment AST node implementation --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/Comment.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace clang {
+namespace comments {
+
+const char *Comment::getCommentKindName() const {
+  switch (getCommentKind()) {
+  case NoCommentKind: return "NoCommentKind";
+#define ABSTRACT_COMMENT(COMMENT)
+#define COMMENT(CLASS, PARENT) \
+  case CLASS##Kind: \
+    return #CLASS;
+#include "clang/AST/CommentNodes.inc"
+#undef COMMENT
+#undef ABSTRACT_COMMENT
+  }
+  llvm_unreachable("Unknown comment kind!");
+}
+
+namespace {
+struct good {};
+struct bad {};
+
+template <typename T>
+good implements_child_begin_end(Comment::child_iterator (T::*)() const) {
+  return good();
+}
+
+static inline bad implements_child_begin_end(
+                      Comment::child_iterator (Comment::*)() const) {
+  return bad();
+}
+
+#define ASSERT_IMPLEMENTS_child_begin(function) \
+  (void) sizeof(good(implements_child_begin_end(function)))
+
+static inline void CheckCommentASTNodes() {
+#define ABSTRACT_COMMENT(COMMENT)
+#define COMMENT(CLASS, PARENT) \
+  ASSERT_IMPLEMENTS_child_begin(&CLASS::child_begin); \
+  ASSERT_IMPLEMENTS_child_begin(&CLASS::child_end);
+#include "clang/AST/CommentNodes.inc"
+#undef COMMENT
+#undef ABSTRACT_COMMENT
+}
+
+#undef ASSERT_IMPLEMENTS_child_begin
+
+} // end unnamed namespace
+
+Comment::child_iterator Comment::child_begin() const {
+  switch (getCommentKind()) {
+  case NoCommentKind: llvm_unreachable("comment without a kind");
+#define ABSTRACT_COMMENT(COMMENT)
+#define COMMENT(CLASS, PARENT) \
+  case CLASS##Kind: \
+    return static_cast<const CLASS *>(this)->child_begin();
+#include "clang/AST/CommentNodes.inc"
+#undef COMMENT
+#undef ABSTRACT_COMMENT
+  }
+}
+
+Comment::child_iterator Comment::child_end() const {
+  switch (getCommentKind()) {
+  case NoCommentKind: llvm_unreachable("comment without a kind");
+#define ABSTRACT_COMMENT(COMMENT)
+#define COMMENT(CLASS, PARENT) \
+  case CLASS##Kind: \
+    return static_cast<const CLASS *>(this)->child_end();
+#include "clang/AST/CommentNodes.inc"
+#undef COMMENT
+#undef ABSTRACT_COMMENT
+  }
+}
+
+
+} // end namespace comments
+} // end namespace clang
+

diff --git a/lib/AST/CommentDumper.cpp b/lib/AST/CommentDumper.cpp
new file mode 100644
index 0000000..fd7a394
--- /dev/null
+++ b/lib/AST/CommentDumper.cpp

@@ -0,0 +1,206 @@
+//===--- CommentDumper.cpp - Dumping implementation for Comment ASTs ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/CommentVisitor.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace clang {
+namespace comments {
+
+namespace {
+class CommentDumper: public comments::ConstCommentVisitor<CommentDumper> {
+  raw_ostream &OS;
+  SourceManager *SM;
+  unsigned IndentLevel;
+
+public:
+  CommentDumper(raw_ostream &OS, SourceManager *SM) :
+      OS(OS), SM(SM), IndentLevel(0)
+  { }
+
+  void dumpIndent() const {
+    for (unsigned i = 1, e = IndentLevel; i < e; ++i)
+      OS << "  ";
+  }
+
+  void dumpLocation(SourceLocation Loc) {
+    if (SM)
+      Loc.print(OS, *SM);
+  }
+
+  void dumpSourceRange(const Comment *C);
+
+  void dumpComment(const Comment *C);
+
+  void dumpSubtree(const Comment *C);
+
+  // Inline content.
+  void visitTextComment(const TextComment *C);
+  void visitInlineCommandComment(const InlineCommandComment *C);
+  void visitHTMLOpenTagComment(const HTMLOpenTagComment *C);
+  void visitHTMLCloseTagComment(const HTMLCloseTagComment *C);
+
+  // Block content.
+  void visitParagraphComment(const ParagraphComment *C);
+  void visitBlockCommandComment(const BlockCommandComment *C);
+  void visitParamCommandComment(const ParamCommandComment *C);
+  void visitVerbatimBlockComment(const VerbatimBlockComment *C);
+  void visitVerbatimBlockLineComment(const VerbatimBlockLineComment *C);
+  void visitVerbatimLineComment(const VerbatimLineComment *C);
+
+  void visitFullComment(const FullComment *C);
+};
+
+void CommentDumper::dumpSourceRange(const Comment *C) {
+  if (!SM)
+    return;
+
+  SourceRange SR = C->getSourceRange();
+
+  OS << " <";
+  dumpLocation(SR.getBegin());
+  if (SR.getBegin() != SR.getEnd()) {
+    OS << ", ";
+    dumpLocation(SR.getEnd());
+  }
+  OS << ">";
+}
+
+void CommentDumper::dumpComment(const Comment *C) {
+  dumpIndent();
+  OS << "(" << C->getCommentKindName()
+     << " " << (void *) C;
+  dumpSourceRange(C);
+}
+
+void CommentDumper::dumpSubtree(const Comment *C) {
+  ++IndentLevel;
+  if (C) {
+    visit(C);
+    for (Comment::child_iterator I = C->child_begin(),
+                                 E = C->child_end();
+         I != E; ++I) {
+      OS << '\n';
+      dumpSubtree(*I);
+    }
+    OS << ')';
+  } else {
+    dumpIndent();
+    OS << "<<<NULL>>>";
+  }
+  --IndentLevel;
+}
+
+void CommentDumper::visitTextComment(const TextComment *C) {
+  dumpComment(C);
+
+  OS << " Text=\"" << C->getText() << "\"";
+}
+
+void CommentDumper::visitInlineCommandComment(const InlineCommandComment *C) {
+  dumpComment(C);
+
+  for (unsigned i = 0, e = C->getArgCount(); i != e; ++i)
+    OS << " Arg[" << i << "]=\"" << C->getArgText(i) << "\"";
+}
+
+void CommentDumper::visitHTMLOpenTagComment(const HTMLOpenTagComment *C) {
+  dumpComment(C);
+
+  OS << " Name=\"" << C->getTagName() << "\"";
+  if (C->getAttrCount() != 0) {
+    OS << " Attrs: ";
+    for (unsigned i = 0, e = C->getAttrCount(); i != e; ++i) {
+      const HTMLOpenTagComment::Attribute &Attr = C->getAttr(i);
+      OS << " \"" << Attr.Name << "=\"" << Attr.Value << "\"";
+    }
+  }
+}
+
+void CommentDumper::visitHTMLCloseTagComment(const HTMLCloseTagComment *C) {
+  dumpComment(C);
+
+  OS << " Name=\"" << C->getTagName() << "\"";
+}
+
+void CommentDumper::visitParagraphComment(const ParagraphComment *C) {
+  dumpComment(C);
+}
+
+void CommentDumper::visitBlockCommandComment(const BlockCommandComment *C) {
+  dumpComment(C);
+
+  OS << " Name=\"" << C->getCommandName() << "\"";
+}
+
+void CommentDumper::visitParamCommandComment(const ParamCommandComment *C) {
+  dumpComment(C);
+
+  switch (C->getDirection()) {
+  case ParamCommandComment::In:
+    OS << " [in]";
+    break;
+  case ParamCommandComment::Out:
+    OS << " [out]";
+    break;
+  case ParamCommandComment::InOut:
+    OS << " [in,out]";
+    break;
+  }
+
+  if (C->isDirectionExplicit())
+    OS << " explicitly";
+  else
+    OS << " implicitly";
+
+  if (C->hasParamName()) {
+    OS << " Param=\"" << C->getParamName() << "\"";
+  }
+}
+
+void CommentDumper::visitVerbatimBlockComment(const VerbatimBlockComment *C) {
+  dumpComment(C);
+
+  OS << " Name=\"" << C->getCommandName() << "\""
+        " CloseName=\"" << C->getCloseName() << "\"";
+}
+
+void CommentDumper::visitVerbatimBlockLineComment(const VerbatimBlockLineComment *C) {
+  dumpComment(C);
+
+  OS << " Text=\"" << C->getText() << "\"";
+}
+
+void CommentDumper::visitVerbatimLineComment(const VerbatimLineComment *C) {
+  dumpComment(C);
+
+  OS << " Text=\"" << C->getText() << "\"";
+}
+
+void CommentDumper::visitFullComment(const FullComment *C) {
+  dumpComment(C);
+}
+
+} // unnamed namespace
+
+void Comment::dump() const {
+  CommentDumper D(llvm::errs(), NULL);
+  D.dumpSubtree(this);
+  llvm::errs() << '\n';
+}
+
+void Comment::dump(SourceManager &SM) const {
+  CommentDumper D(llvm::errs(), &SM);
+  D.dumpSubtree(this);
+  llvm::errs() << '\n';
+}
+
+} // end namespace comments
+} // end namespace clang
+

diff --git a/lib/AST/CommentLexer.cpp b/lib/AST/CommentLexer.cpp
index c3a801d..77d2a9b 100644
--- a/lib/AST/CommentLexer.cpp
+++ b/lib/AST/CommentLexer.cpp

@@ -122,6 +122,7 @@
 }
 
 namespace {
+/// Returns pointer to the first newline character in the string.
 const char *findNewline(const char *BufferPtr, const char *BufferEnd) {
   for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
     const char C = *BufferPtr;
@@ -270,6 +271,9 @@
   case LS_HTMLOpenTag:
     lexHTMLOpenTag(T);
     return;
+  case LS_HTMLCloseTag:
+    lexHTMLCloseTag(T);
+    return;
   }
 
   assert(State == LS_Normal);
@@ -356,7 +360,7 @@
         if (isHTMLIdentifierCharacter(C))
           setupAndLexHTMLOpenTag(T);
         else if (C == '/')
-          lexHTMLCloseTag(T);
+          setupAndLexHTMLCloseTag(T);
         else {
           StringRef Text(BufferPtr, TokenPtr - BufferPtr);
           formTokenWithChars(T, TokenPtr, tok::text);
@@ -404,6 +408,18 @@
   formTokenWithChars(T, TextBegin, tok::verbatim_block_begin);
   T.setVerbatimBlockName(Name);
 
+  // If there is a newline following the verbatim opening command, skip the
+  // newline so that we don't create an tok::verbatim_block_line with empty
+  // text content.
+  if (BufferPtr != CommentEnd) {
+    const char C = *BufferPtr;
+    if (C == '\n' || C == '\r') {
+      BufferPtr = skipNewline(BufferPtr, CommentEnd);
+      State = LS_VerbatimBlockBody;
+      return;
+    }
+  }
+
   State = LS_VerbatimBlockFirstLine;
 }
 
@@ -419,9 +435,11 @@
 
   // Look for end command in current line.
   size_t Pos = Line.find(VerbatimBlockEndCommandName);
+  const char *TextEnd;
   const char *NextLine;
   if (Pos == StringRef::npos) {
     // Current line is completely verbatim.
+    TextEnd = Newline;
     NextLine = skipNewline(Newline, CommentEnd);
   } else if (Pos == 0) {
     // Current line contains just an end command.
@@ -433,10 +451,11 @@
     return;
   } else {
     // There is some text, followed by end command.  Extract text first.
-    NextLine = BufferPtr + Pos;
+    TextEnd = BufferPtr + Pos;
+    NextLine = TextEnd;
   }
 
-  StringRef Text(BufferPtr, NextLine - BufferPtr);
+  StringRef Text(BufferPtr, TextEnd - BufferPtr);
   formTokenWithChars(T, NextLine, tok::verbatim_block_line);
   T.setVerbatimBlockText(Text);
 
@@ -542,18 +561,26 @@
   }
 }
 
-void Lexer::lexHTMLCloseTag(Token &T) {
+void Lexer::setupAndLexHTMLCloseTag(Token &T) {
   assert(BufferPtr[0] == '<' && BufferPtr[1] == '/');
 
   const char *TagNameBegin = skipWhitespace(BufferPtr + 2, CommentEnd);
   const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd);
 
   const char *End = skipWhitespace(TagNameEnd, CommentEnd);
-  if (End != CommentEnd && *End == '>')
-    End++;
 
   formTokenWithChars(T, End, tok::html_tag_close);
   T.setHTMLTagCloseName(StringRef(TagNameBegin, TagNameEnd - TagNameBegin));
+
+  if (BufferPtr != CommentEnd && *BufferPtr == '>')
+    State = LS_HTMLCloseTag;
+}
+
+void Lexer::lexHTMLCloseTag(Token &T) {
+  assert(BufferPtr != CommentEnd && *BufferPtr == '>');
+
+  formTokenWithChars(T, BufferPtr + 1, tok::html_greater);
+  State = LS_Normal;
 }
 
 Lexer::Lexer(SourceLocation FileLoc, const CommentOptions &CommOpts,
@@ -595,7 +622,8 @@
         BufferPtr++;
 
       CommentState = LCS_InsideBCPLComment;
-      State = LS_Normal;
+      if (State != LS_VerbatimBlockBody && State != LS_VerbatimBlockFirstLine)
+        State = LS_Normal;
       CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd);
       goto again;
     }
@@ -628,7 +656,7 @@
       EndWhitespace++;
 
     // Turn any whitespace between comments (and there is only whitespace
-    // between them) into a newline.  We have two newlines between comments
+    // between them) into a newline.  We have two newlines between C comments
     // in total (first one was synthesized after a comment).
     formTokenWithChars(T, EndWhitespace, tok::newline);
 

diff --git a/lib/AST/CommentParser.cpp b/lib/AST/CommentParser.cpp
new file mode 100644
index 0000000..701b6fa
--- /dev/null
+++ b/lib/AST/CommentParser.cpp

@@ -0,0 +1,414 @@
+//===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/CommentParser.h"
+#include "clang/AST/CommentSema.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace clang {
+namespace comments {
+
+Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator):
+    L(L), S(S), Allocator(Allocator) {
+  consumeToken();
+}
+
+ParamCommandComment *Parser::parseParamCommandArgs(
+    ParamCommandComment *PC,
+    TextTokenRetokenizer &Retokenizer) {
+  Token Arg;
+  // Check if argument looks like direction specification: [dir]
+  // e.g., [in], [out], [in,out]
+  if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
+    PC = S.actOnParamCommandArg(PC,
+                                Arg.getLocation(),
+                                Arg.getEndLocation(),
+                                Arg.getText(),
+                                /* IsDirection = */ true);
+
+  if (Retokenizer.lexWord(Arg))
+    StringRef ArgText = Arg.getText();
+    PC = S.actOnParamCommandArg(PC,
+                                Arg.getLocation(),
+                                Arg.getEndLocation(),
+                                Arg.getText(),
+                                /* IsDirection = */ false);
+
+  return PC;
+}
+
+BlockCommandComment *Parser::parseBlockCommandArgs(
+    BlockCommandComment *BC,
+    TextTokenRetokenizer &Retokenizer,
+    unsigned NumArgs) {
+  typedef BlockCommandComment::Argument Argument;
+  Argument *Args = new (Allocator) Argument[NumArgs];
+  unsigned ParsedArgs = 0;
+  Token Arg;
+  while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
+    Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),
+                                            Arg.getEndLocation()),
+                                Arg.getText());
+    ParsedArgs++;
+  }
+
+  return S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));
+}
+
+BlockCommandComment *Parser::parseBlockCommand() {
+  assert(Tok.is(tok::command));
+
+  ParamCommandComment *PC;
+  BlockCommandComment *BC;
+  bool IsParam = false;
+  unsigned NumArgs = 0;
+  if (S.isParamCommand(Tok.getCommandName())) {
+    IsParam = true;
+    PC = S.actOnParamCommandStart(Tok.getLocation(),
+                                  Tok.getEndLocation(),
+                                  Tok.getCommandName());
+  } else {
+    NumArgs = S.getBlockCommandNumArgs(Tok.getCommandName());
+    BC = S.actOnBlockCommandStart(Tok.getLocation(),
+                                  Tok.getEndLocation(),
+                                  Tok.getCommandName());
+  }
+  consumeToken();
+
+  if (Tok.is(tok::command) && S.isBlockCommand(Tok.getCommandName())) {
+    // Block command ahead.  We can't nest block commands, so pretend that this
+    // command has an empty argument.
+    // TODO: Diag() Warn empty arg to block command
+    ParagraphComment *PC = S.actOnParagraphComment(
+                                ArrayRef<InlineContentComment *>());
+    return S.actOnBlockCommandFinish(BC, PC);
+  }
+
+  if (IsParam || NumArgs > 0) {
+    // In order to parse command arguments we need to retokenize a few
+    // following text tokens.
+    TextTokenRetokenizer Retokenizer(Allocator);
+    while (Tok.is(tok::text)) {
+      if (Retokenizer.addToken(Tok))
+        consumeToken();
+    }
+
+    if (IsParam)
+      PC = parseParamCommandArgs(PC, Retokenizer);
+    else
+      BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs);
+
+    // Put back tokens we didn't use.
+    Token Text;
+    while (Retokenizer.lexText(Text))
+      putBack(Text);
+  }
+
+  BlockContentComment *Block = parseParagraphOrBlockCommand();
+  // Since we have checked for a block command, we should have parsed a
+  // paragraph.
+  if (IsParam)
+    return S.actOnParamCommandFinish(PC, cast<ParagraphComment>(Block));
+  else
+    return S.actOnBlockCommandFinish(BC, cast<ParagraphComment>(Block));
+}
+
+InlineCommandComment *Parser::parseInlineCommand() {
+  assert(Tok.is(tok::command));
+
+  const Token CommandTok = Tok;
+  consumeToken();
+
+  TextTokenRetokenizer Retokenizer(Allocator);
+  while (Tok.is(tok::text)) {
+    if (Retokenizer.addToken(Tok))
+      consumeToken();
+  }
+
+  Token ArgTok;
+  bool ArgTokValid = Retokenizer.lexWord(ArgTok);
+
+  InlineCommandComment *IC;
+  if (ArgTokValid) {
+    IC = S.actOnInlineCommand(CommandTok.getLocation(),
+                              CommandTok.getEndLocation(),
+                              CommandTok.getCommandName(),
+                              ArgTok.getLocation(),
+                              ArgTok.getEndLocation(),
+                              ArgTok.getText());
+  } else {
+    IC = S.actOnInlineCommand(CommandTok.getLocation(),
+                              CommandTok.getEndLocation(),
+                              CommandTok.getCommandName());
+  }
+
+  Token Text;
+  while (Retokenizer.lexText(Text))
+    putBack(Text);
+
+  return IC;
+}
+
+HTMLOpenTagComment *Parser::parseHTMLOpenTag() {
+  assert(Tok.is(tok::html_tag_open));
+  HTMLOpenTagComment *HOT =
+      S.actOnHTMLOpenTagStart(Tok.getLocation(),
+                              Tok.getHTMLTagOpenName());
+  consumeToken();
+
+  SmallVector<HTMLOpenTagComment::Attribute, 2> Attrs;
+  while (true) {
+    if (Tok.is(tok::html_ident)) {
+      Token Ident = Tok;
+      consumeToken();
+      if (Tok.isNot(tok::html_equals)) {
+        Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(),
+                                                      Ident.getHTMLIdent()));
+        continue;
+      }
+      Token Equals = Tok;
+      consumeToken();
+      if (Tok.isNot(tok::html_quoted_string)) {
+        // TODO: Diag() expected quoted string
+        Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(),
+                                                      Ident.getHTMLIdent()));
+        continue;
+      }
+      Attrs.push_back(HTMLOpenTagComment::Attribute(
+                              Ident.getLocation(),
+                              Ident.getHTMLIdent(),
+                              Equals.getLocation(),
+                              SourceRange(Tok.getLocation(),
+                                          Tok.getEndLocation()),
+                              Tok.getHTMLQuotedString()));
+      consumeToken();
+      continue;
+    } else if (Tok.is(tok::html_greater)) {
+      HOT = S.actOnHTMLOpenTagFinish(HOT,
+                                     copyArray(llvm::makeArrayRef(Attrs)),
+                                     Tok.getLocation());
+      consumeToken();
+      return HOT;
+    } else if (Tok.is(tok::html_equals) ||
+               Tok.is(tok::html_quoted_string)) {
+      // TODO: Diag() Err expected ident
+      while (Tok.is(tok::html_equals) ||
+             Tok.is(tok::html_quoted_string))
+        consumeToken();
+    } else {
+      // Not a token from HTML open tag.  Thus HTML tag prematurely ended.
+      // TODO: Diag() Err HTML tag prematurely ended
+      return S.actOnHTMLOpenTagFinish(HOT,
+                                      copyArray(llvm::makeArrayRef(Attrs)),
+                                      SourceLocation());
+    }
+  }
+}
+
+HTMLCloseTagComment *Parser::parseHTMLCloseTag() {
+  assert(Tok.is(tok::html_tag_close));
+  Token TokTagOpen = Tok;
+  consumeToken();
+  SourceLocation Loc;
+  if (Tok.is(tok::html_greater)) {
+    Loc = Tok.getLocation();
+    consumeToken();
+  }
+
+  return S.actOnHTMLCloseTag(TokTagOpen.getLocation(),
+                             Loc,
+                             TokTagOpen.getHTMLTagCloseName());
+}
+
+BlockContentComment *Parser::parseParagraphOrBlockCommand() {
+  SmallVector<InlineContentComment *, 8> Content;
+
+  while (true) {
+    switch (Tok.getKind()) {
+    case tok::verbatim_block_begin:
+    case tok::verbatim_line_name:
+    case tok::eof:
+      assert(Content.size() != 0);
+      break; // Block content or EOF ahead, finish this parapgaph.
+
+    case tok::command:
+      if (S.isBlockCommand(Tok.getCommandName())) {
+        if (Content.size() == 0)
+          return parseBlockCommand();
+        break; // Block command ahead, finish this parapgaph.
+      }
+      if (S.isInlineCommand(Tok.getCommandName())) {
+        Content.push_back(parseInlineCommand());
+        continue;
+      }
+
+      // Not a block command, not an inline command ==> an unknown command.
+      Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
+                                              Tok.getEndLocation(),
+                                              Tok.getCommandName()));
+      consumeToken();
+      continue;
+
+    case tok::newline: {
+      consumeToken();
+      if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
+        consumeToken();
+        break; // Two newlines -- end of paragraph.
+      }
+      if (Content.size() > 0)
+        Content.back()->addTrailingNewline();
+      continue;
+    }
+
+    // Don't deal with HTML tag soup now.
+    case tok::html_tag_open:
+      Content.push_back(parseHTMLOpenTag());
+      continue;
+
+    case tok::html_tag_close:
+      Content.push_back(parseHTMLCloseTag());
+      continue;
+
+    case tok::text:
+      Content.push_back(S.actOnText(Tok.getLocation(),
+                                    Tok.getEndLocation(),
+                                    Tok.getText()));
+      consumeToken();
+      continue;
+
+    case tok::verbatim_block_line:
+    case tok::verbatim_block_end:
+    case tok::verbatim_line_text:
+    case tok::html_ident:
+    case tok::html_equals:
+    case tok::html_quoted_string:
+    case tok::html_greater:
+      llvm_unreachable("should not see this token");
+    }
+    break;
+  }
+
+  return S.actOnParagraphComment(copyArray(llvm::makeArrayRef(Content)));
+}
+
+VerbatimBlockComment *Parser::parseVerbatimBlock() {
+  assert(Tok.is(tok::verbatim_block_begin));
+
+  VerbatimBlockComment *VB =
+      S.actOnVerbatimBlockStart(Tok.getLocation(),
+                                Tok.getVerbatimBlockName());
+  consumeToken();
+
+  // Don't create an empty line if verbatim opening command is followed
+  // by a newline.
+  if (Tok.is(tok::newline))
+    consumeToken();
+
+  SmallVector<VerbatimBlockLineComment *, 8> Lines;
+  while (Tok.is(tok::verbatim_block_line) ||
+         Tok.is(tok::newline)) {
+    VerbatimBlockLineComment *Line;
+    if (Tok.is(tok::verbatim_block_line)) {
+      Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
+                                      Tok.getVerbatimBlockText());
+      consumeToken();
+      if (Tok.is(tok::newline)) {
+        consumeToken();
+      }
+    } else {
+      // Empty line, just a tok::newline.
+      Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
+                                      "");
+      consumeToken();
+    }
+    Lines.push_back(Line);
+  }
+
+  assert(Tok.is(tok::verbatim_block_end));
+  VB = S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),
+                                  Tok.getVerbatimBlockName(),
+                                  copyArray(llvm::makeArrayRef(Lines)));
+  consumeToken();
+
+  return VB;
+}
+
+VerbatimLineComment *Parser::parseVerbatimLine() {
+  assert(Tok.is(tok::verbatim_line_name));
+
+  Token NameTok = Tok;
+  consumeToken();
+
+  SourceLocation TextBegin;
+  StringRef Text;
+  // Next token might not be a tok::verbatim_line_text if verbatim line
+  // starting command comes just before a newline or comment end.
+  if (Tok.is(tok::verbatim_line_text)) {
+    TextBegin = Tok.getLocation();
+    Text = Tok.getVerbatimLineText();
+  } else {
+    TextBegin = NameTok.getEndLocation();
+    Text = "";
+  }
+
+  VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
+                                                NameTok.getVerbatimLineName(),
+                                                TextBegin,
+                                                Text);
+  consumeToken();
+  return VL;
+}
+
+BlockContentComment *Parser::parseBlockContent() {
+  switch (Tok.getKind()) {
+  case tok::text:
+  case tok::command:
+  case tok::html_tag_open:
+  case tok::html_tag_close:
+    return parseParagraphOrBlockCommand();
+
+  case tok::verbatim_block_begin:
+    return parseVerbatimBlock();
+
+  case tok::verbatim_line_name:
+    return parseVerbatimLine();
+
+  case tok::eof:
+  case tok::newline:
+  case tok::verbatim_block_line:
+  case tok::verbatim_block_end:
+  case tok::verbatim_line_text:
+  case tok::html_ident:
+  case tok::html_equals:
+  case tok::html_quoted_string:
+  case tok::html_greater:
+    llvm_unreachable("should not see this token");
+  }
+}
+
+FullComment *Parser::parseFullComment() {
+  // Skip newlines at the beginning of the comment.
+  while (Tok.is(tok::newline))
+    consumeToken();
+
+  SmallVector<BlockContentComment *, 8> Blocks;
+  while (Tok.isNot(tok::eof)) {
+    Blocks.push_back(parseBlockContent());
+
+    // Skip extra newlines after paragraph end.
+    while (Tok.is(tok::newline))
+      consumeToken();
+  }
+  return S.actOnFullComment(copyArray(llvm::makeArrayRef(Blocks)));
+}
+
+} // end namespace comments
+} // end namespace clang
+
+

diff --git a/lib/AST/CommentSema.cpp b/lib/AST/CommentSema.cpp
new file mode 100644
index 0000000..1193e04
--- /dev/null
+++ b/lib/AST/CommentSema.cpp

@@ -0,0 +1,268 @@
+//===--- CommentSema.cpp - Doxygen comment semantic analysis --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/CommentSema.h"
+#include "llvm/ADT/StringSwitch.h"
+
+namespace clang {
+namespace comments {
+
+Sema::Sema(llvm::BumpPtrAllocator &Allocator) :
+    Allocator(Allocator) {
+}
+
+ParagraphComment *Sema::actOnParagraphComment(
+                              ArrayRef<InlineContentComment *> Content) {
+  return new (Allocator) ParagraphComment(Content);
+}
+
+BlockCommandComment *Sema::actOnBlockCommandStart(SourceLocation LocBegin,
+                                                  SourceLocation LocEnd,
+                                                  StringRef Name) {
+  return new (Allocator) BlockCommandComment(LocBegin, LocEnd, Name);
+}
+
+BlockCommandComment *Sema::actOnBlockCommandArgs(
+                              BlockCommandComment *Command,
+                              ArrayRef<BlockCommandComment::Argument> Args) {
+  Command->setArgs(Args);
+  return Command;
+}
+
+BlockCommandComment *Sema::actOnBlockCommandFinish(
+                              BlockCommandComment *Command,
+                              ParagraphComment *Paragraph) {
+  Command->setParagraph(Paragraph);
+  return Command;
+}
+
+ParamCommandComment *Sema::actOnParamCommandStart(SourceLocation LocBegin,
+                                                  SourceLocation LocEnd,
+                                                  StringRef Name) {
+  return new (Allocator) ParamCommandComment(LocBegin, LocEnd, Name);
+}
+
+ParamCommandComment *Sema::actOnParamCommandArg(ParamCommandComment *Command,
+                                                SourceLocation ArgLocBegin,
+                                                SourceLocation ArgLocEnd,
+                                                StringRef Arg,
+                                                bool IsDirection) {
+  if (IsDirection) {
+    ParamCommandComment::PassDirection Direction;
+    std::string ArgLower = Arg.lower();
+    // TODO: optimize: lower Name first (need an API in SmallString for that),
+    // after that StringSwitch.
+    if (ArgLower == "[in]")
+      Direction = ParamCommandComment::In;
+    else if (ArgLower == "[out]")
+      Direction = ParamCommandComment::Out;
+    else if (ArgLower == "[in,out]" || ArgLower == "[out,in]")
+      Direction = ParamCommandComment::InOut;
+    else {
+      // Remove spaces.
+      std::string::iterator O = ArgLower.begin();
+      for (std::string::iterator I = ArgLower.begin(), E = ArgLower.end();
+           I != E; ++I) {
+        const char C = *I;
+        if (C != ' ' && C != '\n' && C != '\r' &&
+            C != '\t' && C != '\v' && C != '\f')
+          *O++ = C;
+      }
+      ArgLower.resize(O - ArgLower.begin());
+
+      bool RemovingWhitespaceHelped = false;
+      if (ArgLower == "[in]") {
+        Direction = ParamCommandComment::In;
+        RemovingWhitespaceHelped = true;
+      } else if (ArgLower == "[out]") {
+        Direction = ParamCommandComment::Out;
+        RemovingWhitespaceHelped = true;
+      } else if (ArgLower == "[in,out]" || ArgLower == "[out,in]") {
+        Direction = ParamCommandComment::InOut;
+        RemovingWhitespaceHelped = true;
+      } else {
+        Direction = ParamCommandComment::In;
+        RemovingWhitespaceHelped = false;
+      }
+      // Diag() unrecognized parameter passing direction, valid directions are ...
+      // if (RemovingWhitespaceHelped) FixIt
+    }
+    Command->setDirection(Direction, /* Explicit = */ true);
+  } else {
+    if (Command->getArgCount() == 0) {
+      if (!Command->isDirectionExplicit()) {
+        // User didn't provide a direction argument.
+        Command->setDirection(ParamCommandComment::In, /* Explicit = */ false);
+      }
+      typedef BlockCommandComment::Argument Argument;
+      Argument *A = new (Allocator) Argument(SourceRange(ArgLocBegin,
+                                                         ArgLocEnd),
+                                             Arg);
+      Command->setArgs(llvm::makeArrayRef(A, 1));
+      // if (...) Diag() unrecognized parameter name
+    } else {
+      // Diag() \\param command requires at most 2 arguments
+    }
+  }
+  return Command;
+}
+
+ParamCommandComment *Sema::actOnParamCommandFinish(ParamCommandComment *Command,
+                                                   ParagraphComment *Paragraph) {
+  Command->setParagraph(Paragraph);
+  return Command;
+}
+
+InlineCommandComment *Sema::actOnInlineCommand(SourceLocation CommandLocBegin,
+                                               SourceLocation CommandLocEnd,
+                                               StringRef CommandName) {
+  ArrayRef<InlineCommandComment::Argument> Args;
+  return new (Allocator) InlineCommandComment(CommandLocBegin,
+                                              CommandLocEnd,
+                                              CommandName,
+                                              Args);
+}
+
+InlineCommandComment *Sema::actOnInlineCommand(SourceLocation CommandLocBegin,
+                                               SourceLocation CommandLocEnd,
+                                               StringRef CommandName,
+                                               SourceLocation ArgLocBegin,
+                                               SourceLocation ArgLocEnd,
+                                               StringRef Arg) {
+  typedef InlineCommandComment::Argument Argument;
+  Argument *A = new (Allocator) Argument(SourceRange(ArgLocBegin,
+                                                     ArgLocEnd),
+                                         Arg);
+
+  return new (Allocator) InlineCommandComment(CommandLocBegin,
+                                              CommandLocEnd,
+                                              CommandName,
+                                              llvm::makeArrayRef(A, 1));
+}
+
+InlineContentComment *Sema::actOnUnknownCommand(SourceLocation LocBegin,
+                                                SourceLocation LocEnd,
+                                                StringRef Name) {
+  ArrayRef<InlineCommandComment::Argument> Args;
+  return new (Allocator) InlineCommandComment(LocBegin, LocEnd, Name, Args);
+}
+
+TextComment *Sema::actOnText(SourceLocation LocBegin,
+                             SourceLocation LocEnd,
+                             StringRef Text) {
+  return new (Allocator) TextComment(LocBegin, LocEnd, Text);
+}
+
+VerbatimBlockComment *Sema::actOnVerbatimBlockStart(SourceLocation Loc,
+                                                    StringRef Name) {
+  return new (Allocator) VerbatimBlockComment(
+                                  Loc,
+                                  Loc.getLocWithOffset(1 + Name.size()),
+                                  Name);
+}
+
+VerbatimBlockLineComment *Sema::actOnVerbatimBlockLine(SourceLocation Loc,
+                                                       StringRef Text) {
+  return new (Allocator) VerbatimBlockLineComment(Loc, Text);
+}
+
+VerbatimBlockComment *Sema::actOnVerbatimBlockFinish(
+                            VerbatimBlockComment *Block,
+                            SourceLocation CloseNameLocBegin,
+                            StringRef CloseName,
+                            ArrayRef<VerbatimBlockLineComment *> Lines) {
+  Block->setCloseName(CloseName, CloseNameLocBegin);
+  Block->setLines(Lines);
+  return Block;
+}
+
+VerbatimLineComment *Sema::actOnVerbatimLine(SourceLocation LocBegin,
+                                             StringRef Name,
+                                             SourceLocation TextBegin,
+                                             StringRef Text) {
+  return new (Allocator) VerbatimLineComment(
+                              LocBegin,
+                              TextBegin.getLocWithOffset(Text.size()),
+                              Name,
+                              TextBegin,
+                              Text);
+}
+
+HTMLOpenTagComment *Sema::actOnHTMLOpenTagStart(SourceLocation LocBegin,
+                                                StringRef TagName) {
+  return new (Allocator) HTMLOpenTagComment(LocBegin, TagName);
+}
+
+HTMLOpenTagComment *Sema::actOnHTMLOpenTagFinish(
+                              HTMLOpenTagComment *Tag,
+                              ArrayRef<HTMLOpenTagComment::Attribute> Attrs,
+                              SourceLocation GreaterLoc) {
+  Tag->setAttrs(Attrs);
+  Tag->setGreaterLoc(GreaterLoc);
+  return Tag;
+}
+
+HTMLCloseTagComment *Sema::actOnHTMLCloseTag(SourceLocation LocBegin,
+                                             SourceLocation LocEnd,
+                                             StringRef TagName) {
+  return new (Allocator) HTMLCloseTagComment(LocBegin, LocEnd, TagName);
+}
+
+FullComment *Sema::actOnFullComment(
+                              ArrayRef<BlockContentComment *> Blocks) {
+  return new (Allocator) FullComment(Blocks);
+}
+
+// TODO: tablegen
+bool Sema::isBlockCommand(StringRef Name) {
+  return llvm::StringSwitch<bool>(Name)
+      .Case("brief", true)
+      .Case("result", true)
+      .Case("return", true)
+      .Case("returns", true)
+      .Case("author", true)
+      .Case("authors", true)
+      .Case("pre", true)
+      .Case("post", true)
+      .Default(false) || isParamCommand(Name);
+}
+
+bool Sema::isParamCommand(StringRef Name) {
+  return llvm::StringSwitch<bool>(Name)
+      .Case("param", true)
+      .Case("arg", true)
+      .Default(false);
+}
+
+unsigned Sema::getBlockCommandNumArgs(StringRef Name) {
+  return llvm::StringSwitch<unsigned>(Name)
+      .Case("brief", 0)
+      .Case("pre", 0)
+      .Case("post", 0)
+      .Case("author", 0)
+      .Case("authors", 0)
+      .Default(0);
+}
+
+bool Sema::isInlineCommand(StringRef Name) {
+  return llvm::StringSwitch<bool>(Name)
+      .Case("c", true)
+      .Case("em", true)
+      .Default(false);
+}
+
+bool Sema::HTMLOpenTagNeedsClosing(StringRef Name) {
+  return llvm::StringSwitch<bool>(Name)
+      .Case("br", true)
+      .Default(true);
+}
+
+} // end namespace comments
+} // end namespace clang
+
commit	8d3ba23f2d9e6c87794d059412a0808c9cbacb25	[log] [tgz]
author	Dmitri Gribenko <gribozavr@gmail.com>	Fri Jul 06 00:28:32 2012 +0000
committer	Dmitri Gribenko <gribozavr@gmail.com>	Fri Jul 06 00:28:32 2012 +0000
tree	c72c618faeffa1c098c4df33857bd12a72c62fb1
parent	1838703fea568b394407b83d1055b4c7f52fb105 [diff]