Implement AST classes for comments, a real parser for Doxygen comments and a
very simple semantic analysis that just builds the AST; minor changes for lexer
to pick up source locations I didn't think about before.
Comments AST is modelled along the ideas of HTML AST: block and inline content.
* Block content is a paragraph or a command that has a paragraph as an argument
or verbatim command.
* Inline content is placed within some block. Inline content includes plain
text, inline commands and HTML as tag soup.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@159790 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/AST/ASTContext.cpp b/lib/AST/ASTContext.cpp
index 66096f3..f65f9c0 100644
--- a/lib/AST/ASTContext.cpp
+++ b/lib/AST/ASTContext.cpp
@@ -13,6 +13,9 @@
#include "clang/AST/ASTContext.h"
#include "clang/AST/CharUnits.h"
+#include "clang/AST/CommentLexer.h"
+#include "clang/AST/CommentSema.h"
+#include "clang/AST/CommentParser.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/DeclTemplate.h"
@@ -149,18 +152,47 @@
const RawComment *ASTContext::getRawCommentForDecl(const Decl *D) const {
// Check whether we have cached a comment string for this declaration
// already.
- llvm::DenseMap<const Decl *, const RawComment *>::iterator Pos
+ llvm::DenseMap<const Decl *, RawAndParsedComment>::iterator Pos
= DeclComments.find(D);
- if (Pos != DeclComments.end())
- return Pos->second;
+ if (Pos != DeclComments.end()) {
+ RawAndParsedComment C = Pos->second;
+ return C.first;
+ }
const RawComment *RC = getRawCommentForDeclNoCache(D);
// If we found a comment, it should be a documentation comment.
assert(!RC || RC->isDocumentation());
- DeclComments[D] = RC;
+ DeclComments[D] = RawAndParsedComment(RC, NULL);
return RC;
}
+comments::FullComment *ASTContext::getCommentForDecl(const Decl *D) const {
+ llvm::DenseMap<const Decl *, RawAndParsedComment>::iterator Pos
+ = DeclComments.find(D);
+ const RawComment *RC;
+ if (Pos != DeclComments.end()) {
+ RawAndParsedComment C = Pos->second;
+ if (comments::FullComment *FC = C.second)
+ return FC;
+ RC = C.first;
+ } else
+ RC = getRawCommentForDecl(D);
+
+ if (!RC)
+ return NULL;
+
+ const StringRef RawText = RC->getRawText(SourceMgr);
+ comments::Lexer L(RC->getSourceRange().getBegin(), comments::CommentOptions(),
+ RawText.begin(), RawText.end());
+
+ comments::Sema S(this->BumpAlloc);
+ comments::Parser P(L, S, this->BumpAlloc);
+
+ comments::FullComment *FC = P.parseFullComment();
+ DeclComments[D].second = FC;
+ return FC;
+}
+
void
ASTContext::CanonicalTemplateTemplateParm::Profile(llvm::FoldingSetNodeID &ID,
TemplateTemplateParmDecl *Parm) {
diff --git a/lib/AST/CMakeLists.txt b/lib/AST/CMakeLists.txt
index 5dad60c..c45f721 100644
--- a/lib/AST/CMakeLists.txt
+++ b/lib/AST/CMakeLists.txt
@@ -8,8 +8,12 @@
ASTImporter.cpp
AttrImpl.cpp
CXXInheritance.cpp
+ Comment.cpp
CommentBriefParser.cpp
+ CommentDumper.cpp
CommentLexer.cpp
+ CommentParser.cpp
+ CommentSema.cpp
Decl.cpp
DeclarationName.cpp
DeclBase.cpp
@@ -60,6 +64,7 @@
ClangAttrList
ClangAttrImpl
ClangDiagnosticAST
+ ClangCommentNodes
ClangDeclNodes
ClangStmtNodes
)
diff --git a/lib/AST/Comment.cpp b/lib/AST/Comment.cpp
new file mode 100644
index 0000000..22277ad
--- /dev/null
+++ b/lib/AST/Comment.cpp
@@ -0,0 +1,90 @@
+//===--- Comment.cpp - Comment AST node implementation --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/Comment.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace clang {
+namespace comments {
+
+const char *Comment::getCommentKindName() const {
+ switch (getCommentKind()) {
+ case NoCommentKind: return "NoCommentKind";
+#define ABSTRACT_COMMENT(COMMENT)
+#define COMMENT(CLASS, PARENT) \
+ case CLASS##Kind: \
+ return #CLASS;
+#include "clang/AST/CommentNodes.inc"
+#undef COMMENT
+#undef ABSTRACT_COMMENT
+ }
+ llvm_unreachable("Unknown comment kind!");
+}
+
+namespace {
+struct good {};
+struct bad {};
+
+template <typename T>
+good implements_child_begin_end(Comment::child_iterator (T::*)() const) {
+ return good();
+}
+
+static inline bad implements_child_begin_end(
+ Comment::child_iterator (Comment::*)() const) {
+ return bad();
+}
+
+#define ASSERT_IMPLEMENTS_child_begin(function) \
+ (void) sizeof(good(implements_child_begin_end(function)))
+
+static inline void CheckCommentASTNodes() {
+#define ABSTRACT_COMMENT(COMMENT)
+#define COMMENT(CLASS, PARENT) \
+ ASSERT_IMPLEMENTS_child_begin(&CLASS::child_begin); \
+ ASSERT_IMPLEMENTS_child_begin(&CLASS::child_end);
+#include "clang/AST/CommentNodes.inc"
+#undef COMMENT
+#undef ABSTRACT_COMMENT
+}
+
+#undef ASSERT_IMPLEMENTS_child_begin
+
+} // end unnamed namespace
+
+Comment::child_iterator Comment::child_begin() const {
+ switch (getCommentKind()) {
+ case NoCommentKind: llvm_unreachable("comment without a kind");
+#define ABSTRACT_COMMENT(COMMENT)
+#define COMMENT(CLASS, PARENT) \
+ case CLASS##Kind: \
+ return static_cast<const CLASS *>(this)->child_begin();
+#include "clang/AST/CommentNodes.inc"
+#undef COMMENT
+#undef ABSTRACT_COMMENT
+ }
+}
+
+Comment::child_iterator Comment::child_end() const {
+ switch (getCommentKind()) {
+ case NoCommentKind: llvm_unreachable("comment without a kind");
+#define ABSTRACT_COMMENT(COMMENT)
+#define COMMENT(CLASS, PARENT) \
+ case CLASS##Kind: \
+ return static_cast<const CLASS *>(this)->child_end();
+#include "clang/AST/CommentNodes.inc"
+#undef COMMENT
+#undef ABSTRACT_COMMENT
+ }
+}
+
+
+} // end namespace comments
+} // end namespace clang
+
diff --git a/lib/AST/CommentDumper.cpp b/lib/AST/CommentDumper.cpp
new file mode 100644
index 0000000..fd7a394
--- /dev/null
+++ b/lib/AST/CommentDumper.cpp
@@ -0,0 +1,206 @@
+//===--- CommentDumper.cpp - Dumping implementation for Comment ASTs ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/CommentVisitor.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace clang {
+namespace comments {
+
+namespace {
+class CommentDumper: public comments::ConstCommentVisitor<CommentDumper> {
+ raw_ostream &OS;
+ SourceManager *SM;
+ unsigned IndentLevel;
+
+public:
+ CommentDumper(raw_ostream &OS, SourceManager *SM) :
+ OS(OS), SM(SM), IndentLevel(0)
+ { }
+
+ void dumpIndent() const {
+ for (unsigned i = 1, e = IndentLevel; i < e; ++i)
+ OS << " ";
+ }
+
+ void dumpLocation(SourceLocation Loc) {
+ if (SM)
+ Loc.print(OS, *SM);
+ }
+
+ void dumpSourceRange(const Comment *C);
+
+ void dumpComment(const Comment *C);
+
+ void dumpSubtree(const Comment *C);
+
+ // Inline content.
+ void visitTextComment(const TextComment *C);
+ void visitInlineCommandComment(const InlineCommandComment *C);
+ void visitHTMLOpenTagComment(const HTMLOpenTagComment *C);
+ void visitHTMLCloseTagComment(const HTMLCloseTagComment *C);
+
+ // Block content.
+ void visitParagraphComment(const ParagraphComment *C);
+ void visitBlockCommandComment(const BlockCommandComment *C);
+ void visitParamCommandComment(const ParamCommandComment *C);
+ void visitVerbatimBlockComment(const VerbatimBlockComment *C);
+ void visitVerbatimBlockLineComment(const VerbatimBlockLineComment *C);
+ void visitVerbatimLineComment(const VerbatimLineComment *C);
+
+ void visitFullComment(const FullComment *C);
+};
+
+void CommentDumper::dumpSourceRange(const Comment *C) {
+ if (!SM)
+ return;
+
+ SourceRange SR = C->getSourceRange();
+
+ OS << " <";
+ dumpLocation(SR.getBegin());
+ if (SR.getBegin() != SR.getEnd()) {
+ OS << ", ";
+ dumpLocation(SR.getEnd());
+ }
+ OS << ">";
+}
+
+void CommentDumper::dumpComment(const Comment *C) {
+ dumpIndent();
+ OS << "(" << C->getCommentKindName()
+ << " " << (void *) C;
+ dumpSourceRange(C);
+}
+
+void CommentDumper::dumpSubtree(const Comment *C) {
+ ++IndentLevel;
+ if (C) {
+ visit(C);
+ for (Comment::child_iterator I = C->child_begin(),
+ E = C->child_end();
+ I != E; ++I) {
+ OS << '\n';
+ dumpSubtree(*I);
+ }
+ OS << ')';
+ } else {
+ dumpIndent();
+ OS << "<<<NULL>>>";
+ }
+ --IndentLevel;
+}
+
+void CommentDumper::visitTextComment(const TextComment *C) {
+ dumpComment(C);
+
+ OS << " Text=\"" << C->getText() << "\"";
+}
+
+void CommentDumper::visitInlineCommandComment(const InlineCommandComment *C) {
+ dumpComment(C);
+
+ for (unsigned i = 0, e = C->getArgCount(); i != e; ++i)
+ OS << " Arg[" << i << "]=\"" << C->getArgText(i) << "\"";
+}
+
+void CommentDumper::visitHTMLOpenTagComment(const HTMLOpenTagComment *C) {
+ dumpComment(C);
+
+ OS << " Name=\"" << C->getTagName() << "\"";
+ if (C->getAttrCount() != 0) {
+ OS << " Attrs: ";
+ for (unsigned i = 0, e = C->getAttrCount(); i != e; ++i) {
+ const HTMLOpenTagComment::Attribute &Attr = C->getAttr(i);
+ OS << " \"" << Attr.Name << "=\"" << Attr.Value << "\"";
+ }
+ }
+}
+
+void CommentDumper::visitHTMLCloseTagComment(const HTMLCloseTagComment *C) {
+ dumpComment(C);
+
+ OS << " Name=\"" << C->getTagName() << "\"";
+}
+
+void CommentDumper::visitParagraphComment(const ParagraphComment *C) {
+ dumpComment(C);
+}
+
+void CommentDumper::visitBlockCommandComment(const BlockCommandComment *C) {
+ dumpComment(C);
+
+ OS << " Name=\"" << C->getCommandName() << "\"";
+}
+
+void CommentDumper::visitParamCommandComment(const ParamCommandComment *C) {
+ dumpComment(C);
+
+ switch (C->getDirection()) {
+ case ParamCommandComment::In:
+ OS << " [in]";
+ break;
+ case ParamCommandComment::Out:
+ OS << " [out]";
+ break;
+ case ParamCommandComment::InOut:
+ OS << " [in,out]";
+ break;
+ }
+
+ if (C->isDirectionExplicit())
+ OS << " explicitly";
+ else
+ OS << " implicitly";
+
+ if (C->hasParamName()) {
+ OS << " Param=\"" << C->getParamName() << "\"";
+ }
+}
+
+void CommentDumper::visitVerbatimBlockComment(const VerbatimBlockComment *C) {
+ dumpComment(C);
+
+ OS << " Name=\"" << C->getCommandName() << "\""
+ " CloseName=\"" << C->getCloseName() << "\"";
+}
+
+void CommentDumper::visitVerbatimBlockLineComment(const VerbatimBlockLineComment *C) {
+ dumpComment(C);
+
+ OS << " Text=\"" << C->getText() << "\"";
+}
+
+void CommentDumper::visitVerbatimLineComment(const VerbatimLineComment *C) {
+ dumpComment(C);
+
+ OS << " Text=\"" << C->getText() << "\"";
+}
+
+void CommentDumper::visitFullComment(const FullComment *C) {
+ dumpComment(C);
+}
+
+} // unnamed namespace
+
+void Comment::dump() const {
+ CommentDumper D(llvm::errs(), NULL);
+ D.dumpSubtree(this);
+ llvm::errs() << '\n';
+}
+
+void Comment::dump(SourceManager &SM) const {
+ CommentDumper D(llvm::errs(), &SM);
+ D.dumpSubtree(this);
+ llvm::errs() << '\n';
+}
+
+} // end namespace comments
+} // end namespace clang
+
diff --git a/lib/AST/CommentLexer.cpp b/lib/AST/CommentLexer.cpp
index c3a801d..77d2a9b 100644
--- a/lib/AST/CommentLexer.cpp
+++ b/lib/AST/CommentLexer.cpp
@@ -122,6 +122,7 @@
}
namespace {
+/// Returns pointer to the first newline character in the string.
const char *findNewline(const char *BufferPtr, const char *BufferEnd) {
for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
const char C = *BufferPtr;
@@ -270,6 +271,9 @@
case LS_HTMLOpenTag:
lexHTMLOpenTag(T);
return;
+ case LS_HTMLCloseTag:
+ lexHTMLCloseTag(T);
+ return;
}
assert(State == LS_Normal);
@@ -356,7 +360,7 @@
if (isHTMLIdentifierCharacter(C))
setupAndLexHTMLOpenTag(T);
else if (C == '/')
- lexHTMLCloseTag(T);
+ setupAndLexHTMLCloseTag(T);
else {
StringRef Text(BufferPtr, TokenPtr - BufferPtr);
formTokenWithChars(T, TokenPtr, tok::text);
@@ -404,6 +408,18 @@
formTokenWithChars(T, TextBegin, tok::verbatim_block_begin);
T.setVerbatimBlockName(Name);
+ // If there is a newline following the verbatim opening command, skip the
+ // newline so that we don't create an tok::verbatim_block_line with empty
+ // text content.
+ if (BufferPtr != CommentEnd) {
+ const char C = *BufferPtr;
+ if (C == '\n' || C == '\r') {
+ BufferPtr = skipNewline(BufferPtr, CommentEnd);
+ State = LS_VerbatimBlockBody;
+ return;
+ }
+ }
+
State = LS_VerbatimBlockFirstLine;
}
@@ -419,9 +435,11 @@
// Look for end command in current line.
size_t Pos = Line.find(VerbatimBlockEndCommandName);
+ const char *TextEnd;
const char *NextLine;
if (Pos == StringRef::npos) {
// Current line is completely verbatim.
+ TextEnd = Newline;
NextLine = skipNewline(Newline, CommentEnd);
} else if (Pos == 0) {
// Current line contains just an end command.
@@ -433,10 +451,11 @@
return;
} else {
// There is some text, followed by end command. Extract text first.
- NextLine = BufferPtr + Pos;
+ TextEnd = BufferPtr + Pos;
+ NextLine = TextEnd;
}
- StringRef Text(BufferPtr, NextLine - BufferPtr);
+ StringRef Text(BufferPtr, TextEnd - BufferPtr);
formTokenWithChars(T, NextLine, tok::verbatim_block_line);
T.setVerbatimBlockText(Text);
@@ -542,18 +561,26 @@
}
}
-void Lexer::lexHTMLCloseTag(Token &T) {
+void Lexer::setupAndLexHTMLCloseTag(Token &T) {
assert(BufferPtr[0] == '<' && BufferPtr[1] == '/');
const char *TagNameBegin = skipWhitespace(BufferPtr + 2, CommentEnd);
const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd);
const char *End = skipWhitespace(TagNameEnd, CommentEnd);
- if (End != CommentEnd && *End == '>')
- End++;
formTokenWithChars(T, End, tok::html_tag_close);
T.setHTMLTagCloseName(StringRef(TagNameBegin, TagNameEnd - TagNameBegin));
+
+ if (BufferPtr != CommentEnd && *BufferPtr == '>')
+ State = LS_HTMLCloseTag;
+}
+
+void Lexer::lexHTMLCloseTag(Token &T) {
+ assert(BufferPtr != CommentEnd && *BufferPtr == '>');
+
+ formTokenWithChars(T, BufferPtr + 1, tok::html_greater);
+ State = LS_Normal;
}
Lexer::Lexer(SourceLocation FileLoc, const CommentOptions &CommOpts,
@@ -595,7 +622,8 @@
BufferPtr++;
CommentState = LCS_InsideBCPLComment;
- State = LS_Normal;
+ if (State != LS_VerbatimBlockBody && State != LS_VerbatimBlockFirstLine)
+ State = LS_Normal;
CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd);
goto again;
}
@@ -628,7 +656,7 @@
EndWhitespace++;
// Turn any whitespace between comments (and there is only whitespace
- // between them) into a newline. We have two newlines between comments
+ // between them) into a newline. We have two newlines between C comments
// in total (first one was synthesized after a comment).
formTokenWithChars(T, EndWhitespace, tok::newline);
diff --git a/lib/AST/CommentParser.cpp b/lib/AST/CommentParser.cpp
new file mode 100644
index 0000000..701b6fa
--- /dev/null
+++ b/lib/AST/CommentParser.cpp
@@ -0,0 +1,414 @@
+//===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/CommentParser.h"
+#include "clang/AST/CommentSema.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace clang {
+namespace comments {
+
+Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator):
+ L(L), S(S), Allocator(Allocator) {
+ consumeToken();
+}
+
+ParamCommandComment *Parser::parseParamCommandArgs(
+ ParamCommandComment *PC,
+ TextTokenRetokenizer &Retokenizer) {
+ Token Arg;
+ // Check if argument looks like direction specification: [dir]
+ // e.g., [in], [out], [in,out]
+ if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
+ PC = S.actOnParamCommandArg(PC,
+ Arg.getLocation(),
+ Arg.getEndLocation(),
+ Arg.getText(),
+ /* IsDirection = */ true);
+
+ if (Retokenizer.lexWord(Arg))
+ StringRef ArgText = Arg.getText();
+ PC = S.actOnParamCommandArg(PC,
+ Arg.getLocation(),
+ Arg.getEndLocation(),
+ Arg.getText(),
+ /* IsDirection = */ false);
+
+ return PC;
+}
+
+BlockCommandComment *Parser::parseBlockCommandArgs(
+ BlockCommandComment *BC,
+ TextTokenRetokenizer &Retokenizer,
+ unsigned NumArgs) {
+ typedef BlockCommandComment::Argument Argument;
+ Argument *Args = new (Allocator) Argument[NumArgs];
+ unsigned ParsedArgs = 0;
+ Token Arg;
+ while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
+ Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),
+ Arg.getEndLocation()),
+ Arg.getText());
+ ParsedArgs++;
+ }
+
+ return S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));
+}
+
+BlockCommandComment *Parser::parseBlockCommand() {
+ assert(Tok.is(tok::command));
+
+ ParamCommandComment *PC;
+ BlockCommandComment *BC;
+ bool IsParam = false;
+ unsigned NumArgs = 0;
+ if (S.isParamCommand(Tok.getCommandName())) {
+ IsParam = true;
+ PC = S.actOnParamCommandStart(Tok.getLocation(),
+ Tok.getEndLocation(),
+ Tok.getCommandName());
+ } else {
+ NumArgs = S.getBlockCommandNumArgs(Tok.getCommandName());
+ BC = S.actOnBlockCommandStart(Tok.getLocation(),
+ Tok.getEndLocation(),
+ Tok.getCommandName());
+ }
+ consumeToken();
+
+ if (Tok.is(tok::command) && S.isBlockCommand(Tok.getCommandName())) {
+ // Block command ahead. We can't nest block commands, so pretend that this
+ // command has an empty argument.
+ // TODO: Diag() Warn empty arg to block command
+ ParagraphComment *PC = S.actOnParagraphComment(
+ ArrayRef<InlineContentComment *>());
+ return S.actOnBlockCommandFinish(BC, PC);
+ }
+
+ if (IsParam || NumArgs > 0) {
+ // In order to parse command arguments we need to retokenize a few
+ // following text tokens.
+ TextTokenRetokenizer Retokenizer(Allocator);
+ while (Tok.is(tok::text)) {
+ if (Retokenizer.addToken(Tok))
+ consumeToken();
+ }
+
+ if (IsParam)
+ PC = parseParamCommandArgs(PC, Retokenizer);
+ else
+ BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs);
+
+ // Put back tokens we didn't use.
+ Token Text;
+ while (Retokenizer.lexText(Text))
+ putBack(Text);
+ }
+
+ BlockContentComment *Block = parseParagraphOrBlockCommand();
+ // Since we have checked for a block command, we should have parsed a
+ // paragraph.
+ if (IsParam)
+ return S.actOnParamCommandFinish(PC, cast<ParagraphComment>(Block));
+ else
+ return S.actOnBlockCommandFinish(BC, cast<ParagraphComment>(Block));
+}
+
+InlineCommandComment *Parser::parseInlineCommand() {
+ assert(Tok.is(tok::command));
+
+ const Token CommandTok = Tok;
+ consumeToken();
+
+ TextTokenRetokenizer Retokenizer(Allocator);
+ while (Tok.is(tok::text)) {
+ if (Retokenizer.addToken(Tok))
+ consumeToken();
+ }
+
+ Token ArgTok;
+ bool ArgTokValid = Retokenizer.lexWord(ArgTok);
+
+ InlineCommandComment *IC;
+ if (ArgTokValid) {
+ IC = S.actOnInlineCommand(CommandTok.getLocation(),
+ CommandTok.getEndLocation(),
+ CommandTok.getCommandName(),
+ ArgTok.getLocation(),
+ ArgTok.getEndLocation(),
+ ArgTok.getText());
+ } else {
+ IC = S.actOnInlineCommand(CommandTok.getLocation(),
+ CommandTok.getEndLocation(),
+ CommandTok.getCommandName());
+ }
+
+ Token Text;
+ while (Retokenizer.lexText(Text))
+ putBack(Text);
+
+ return IC;
+}
+
+HTMLOpenTagComment *Parser::parseHTMLOpenTag() {
+ assert(Tok.is(tok::html_tag_open));
+ HTMLOpenTagComment *HOT =
+ S.actOnHTMLOpenTagStart(Tok.getLocation(),
+ Tok.getHTMLTagOpenName());
+ consumeToken();
+
+ SmallVector<HTMLOpenTagComment::Attribute, 2> Attrs;
+ while (true) {
+ if (Tok.is(tok::html_ident)) {
+ Token Ident = Tok;
+ consumeToken();
+ if (Tok.isNot(tok::html_equals)) {
+ Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(),
+ Ident.getHTMLIdent()));
+ continue;
+ }
+ Token Equals = Tok;
+ consumeToken();
+ if (Tok.isNot(tok::html_quoted_string)) {
+ // TODO: Diag() expected quoted string
+ Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(),
+ Ident.getHTMLIdent()));
+ continue;
+ }
+ Attrs.push_back(HTMLOpenTagComment::Attribute(
+ Ident.getLocation(),
+ Ident.getHTMLIdent(),
+ Equals.getLocation(),
+ SourceRange(Tok.getLocation(),
+ Tok.getEndLocation()),
+ Tok.getHTMLQuotedString()));
+ consumeToken();
+ continue;
+ } else if (Tok.is(tok::html_greater)) {
+ HOT = S.actOnHTMLOpenTagFinish(HOT,
+ copyArray(llvm::makeArrayRef(Attrs)),
+ Tok.getLocation());
+ consumeToken();
+ return HOT;
+ } else if (Tok.is(tok::html_equals) ||
+ Tok.is(tok::html_quoted_string)) {
+ // TODO: Diag() Err expected ident
+ while (Tok.is(tok::html_equals) ||
+ Tok.is(tok::html_quoted_string))
+ consumeToken();
+ } else {
+ // Not a token from HTML open tag. Thus HTML tag prematurely ended.
+ // TODO: Diag() Err HTML tag prematurely ended
+ return S.actOnHTMLOpenTagFinish(HOT,
+ copyArray(llvm::makeArrayRef(Attrs)),
+ SourceLocation());
+ }
+ }
+}
+
+HTMLCloseTagComment *Parser::parseHTMLCloseTag() {
+ assert(Tok.is(tok::html_tag_close));
+ Token TokTagOpen = Tok;
+ consumeToken();
+ SourceLocation Loc;
+ if (Tok.is(tok::html_greater)) {
+ Loc = Tok.getLocation();
+ consumeToken();
+ }
+
+ return S.actOnHTMLCloseTag(TokTagOpen.getLocation(),
+ Loc,
+ TokTagOpen.getHTMLTagCloseName());
+}
+
+BlockContentComment *Parser::parseParagraphOrBlockCommand() {
+ SmallVector<InlineContentComment *, 8> Content;
+
+ while (true) {
+ switch (Tok.getKind()) {
+ case tok::verbatim_block_begin:
+ case tok::verbatim_line_name:
+ case tok::eof:
+ assert(Content.size() != 0);
+ break; // Block content or EOF ahead, finish this parapgaph.
+
+ case tok::command:
+ if (S.isBlockCommand(Tok.getCommandName())) {
+ if (Content.size() == 0)
+ return parseBlockCommand();
+ break; // Block command ahead, finish this parapgaph.
+ }
+ if (S.isInlineCommand(Tok.getCommandName())) {
+ Content.push_back(parseInlineCommand());
+ continue;
+ }
+
+ // Not a block command, not an inline command ==> an unknown command.
+ Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
+ Tok.getEndLocation(),
+ Tok.getCommandName()));
+ consumeToken();
+ continue;
+
+ case tok::newline: {
+ consumeToken();
+ if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
+ consumeToken();
+ break; // Two newlines -- end of paragraph.
+ }
+ if (Content.size() > 0)
+ Content.back()->addTrailingNewline();
+ continue;
+ }
+
+ // Don't deal with HTML tag soup now.
+ case tok::html_tag_open:
+ Content.push_back(parseHTMLOpenTag());
+ continue;
+
+ case tok::html_tag_close:
+ Content.push_back(parseHTMLCloseTag());
+ continue;
+
+ case tok::text:
+ Content.push_back(S.actOnText(Tok.getLocation(),
+ Tok.getEndLocation(),
+ Tok.getText()));
+ consumeToken();
+ continue;
+
+ case tok::verbatim_block_line:
+ case tok::verbatim_block_end:
+ case tok::verbatim_line_text:
+ case tok::html_ident:
+ case tok::html_equals:
+ case tok::html_quoted_string:
+ case tok::html_greater:
+ llvm_unreachable("should not see this token");
+ }
+ break;
+ }
+
+ return S.actOnParagraphComment(copyArray(llvm::makeArrayRef(Content)));
+}
+
+VerbatimBlockComment *Parser::parseVerbatimBlock() {
+ assert(Tok.is(tok::verbatim_block_begin));
+
+ VerbatimBlockComment *VB =
+ S.actOnVerbatimBlockStart(Tok.getLocation(),
+ Tok.getVerbatimBlockName());
+ consumeToken();
+
+ // Don't create an empty line if verbatim opening command is followed
+ // by a newline.
+ if (Tok.is(tok::newline))
+ consumeToken();
+
+ SmallVector<VerbatimBlockLineComment *, 8> Lines;
+ while (Tok.is(tok::verbatim_block_line) ||
+ Tok.is(tok::newline)) {
+ VerbatimBlockLineComment *Line;
+ if (Tok.is(tok::verbatim_block_line)) {
+ Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
+ Tok.getVerbatimBlockText());
+ consumeToken();
+ if (Tok.is(tok::newline)) {
+ consumeToken();
+ }
+ } else {
+ // Empty line, just a tok::newline.
+ Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
+ "");
+ consumeToken();
+ }
+ Lines.push_back(Line);
+ }
+
+ assert(Tok.is(tok::verbatim_block_end));
+ VB = S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),
+ Tok.getVerbatimBlockName(),
+ copyArray(llvm::makeArrayRef(Lines)));
+ consumeToken();
+
+ return VB;
+}
+
+VerbatimLineComment *Parser::parseVerbatimLine() {
+ assert(Tok.is(tok::verbatim_line_name));
+
+ Token NameTok = Tok;
+ consumeToken();
+
+ SourceLocation TextBegin;
+ StringRef Text;
+ // Next token might not be a tok::verbatim_line_text if verbatim line
+ // starting command comes just before a newline or comment end.
+ if (Tok.is(tok::verbatim_line_text)) {
+ TextBegin = Tok.getLocation();
+ Text = Tok.getVerbatimLineText();
+ } else {
+ TextBegin = NameTok.getEndLocation();
+ Text = "";
+ }
+
+ VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
+ NameTok.getVerbatimLineName(),
+ TextBegin,
+ Text);
+ consumeToken();
+ return VL;
+}
+
+BlockContentComment *Parser::parseBlockContent() {
+ switch (Tok.getKind()) {
+ case tok::text:
+ case tok::command:
+ case tok::html_tag_open:
+ case tok::html_tag_close:
+ return parseParagraphOrBlockCommand();
+
+ case tok::verbatim_block_begin:
+ return parseVerbatimBlock();
+
+ case tok::verbatim_line_name:
+ return parseVerbatimLine();
+
+ case tok::eof:
+ case tok::newline:
+ case tok::verbatim_block_line:
+ case tok::verbatim_block_end:
+ case tok::verbatim_line_text:
+ case tok::html_ident:
+ case tok::html_equals:
+ case tok::html_quoted_string:
+ case tok::html_greater:
+ llvm_unreachable("should not see this token");
+ }
+}
+
+FullComment *Parser::parseFullComment() {
+ // Skip newlines at the beginning of the comment.
+ while (Tok.is(tok::newline))
+ consumeToken();
+
+ SmallVector<BlockContentComment *, 8> Blocks;
+ while (Tok.isNot(tok::eof)) {
+ Blocks.push_back(parseBlockContent());
+
+ // Skip extra newlines after paragraph end.
+ while (Tok.is(tok::newline))
+ consumeToken();
+ }
+ return S.actOnFullComment(copyArray(llvm::makeArrayRef(Blocks)));
+}
+
+} // end namespace comments
+} // end namespace clang
+
+
diff --git a/lib/AST/CommentSema.cpp b/lib/AST/CommentSema.cpp
new file mode 100644
index 0000000..1193e04
--- /dev/null
+++ b/lib/AST/CommentSema.cpp
@@ -0,0 +1,268 @@
+//===--- CommentSema.cpp - Doxygen comment semantic analysis --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/CommentSema.h"
+#include "llvm/ADT/StringSwitch.h"
+
+namespace clang {
+namespace comments {
+
+Sema::Sema(llvm::BumpPtrAllocator &Allocator) :
+ Allocator(Allocator) {
+}
+
+ParagraphComment *Sema::actOnParagraphComment(
+ ArrayRef<InlineContentComment *> Content) {
+ return new (Allocator) ParagraphComment(Content);
+}
+
+BlockCommandComment *Sema::actOnBlockCommandStart(SourceLocation LocBegin,
+ SourceLocation LocEnd,
+ StringRef Name) {
+ return new (Allocator) BlockCommandComment(LocBegin, LocEnd, Name);
+}
+
+BlockCommandComment *Sema::actOnBlockCommandArgs(
+ BlockCommandComment *Command,
+ ArrayRef<BlockCommandComment::Argument> Args) {
+ Command->setArgs(Args);
+ return Command;
+}
+
+BlockCommandComment *Sema::actOnBlockCommandFinish(
+ BlockCommandComment *Command,
+ ParagraphComment *Paragraph) {
+ Command->setParagraph(Paragraph);
+ return Command;
+}
+
+ParamCommandComment *Sema::actOnParamCommandStart(SourceLocation LocBegin,
+ SourceLocation LocEnd,
+ StringRef Name) {
+ return new (Allocator) ParamCommandComment(LocBegin, LocEnd, Name);
+}
+
+ParamCommandComment *Sema::actOnParamCommandArg(ParamCommandComment *Command,
+ SourceLocation ArgLocBegin,
+ SourceLocation ArgLocEnd,
+ StringRef Arg,
+ bool IsDirection) {
+ if (IsDirection) {
+ ParamCommandComment::PassDirection Direction;
+ std::string ArgLower = Arg.lower();
+ // TODO: optimize: lower Name first (need an API in SmallString for that),
+ // after that StringSwitch.
+ if (ArgLower == "[in]")
+ Direction = ParamCommandComment::In;
+ else if (ArgLower == "[out]")
+ Direction = ParamCommandComment::Out;
+ else if (ArgLower == "[in,out]" || ArgLower == "[out,in]")
+ Direction = ParamCommandComment::InOut;
+ else {
+ // Remove spaces.
+ std::string::iterator O = ArgLower.begin();
+ for (std::string::iterator I = ArgLower.begin(), E = ArgLower.end();
+ I != E; ++I) {
+ const char C = *I;
+ if (C != ' ' && C != '\n' && C != '\r' &&
+ C != '\t' && C != '\v' && C != '\f')
+ *O++ = C;
+ }
+ ArgLower.resize(O - ArgLower.begin());
+
+ bool RemovingWhitespaceHelped = false;
+ if (ArgLower == "[in]") {
+ Direction = ParamCommandComment::In;
+ RemovingWhitespaceHelped = true;
+ } else if (ArgLower == "[out]") {
+ Direction = ParamCommandComment::Out;
+ RemovingWhitespaceHelped = true;
+ } else if (ArgLower == "[in,out]" || ArgLower == "[out,in]") {
+ Direction = ParamCommandComment::InOut;
+ RemovingWhitespaceHelped = true;
+ } else {
+ Direction = ParamCommandComment::In;
+ RemovingWhitespaceHelped = false;
+ }
+ // Diag() unrecognized parameter passing direction, valid directions are ...
+ // if (RemovingWhitespaceHelped) FixIt
+ }
+ Command->setDirection(Direction, /* Explicit = */ true);
+ } else {
+ if (Command->getArgCount() == 0) {
+ if (!Command->isDirectionExplicit()) {
+ // User didn't provide a direction argument.
+ Command->setDirection(ParamCommandComment::In, /* Explicit = */ false);
+ }
+ typedef BlockCommandComment::Argument Argument;
+ Argument *A = new (Allocator) Argument(SourceRange(ArgLocBegin,
+ ArgLocEnd),
+ Arg);
+ Command->setArgs(llvm::makeArrayRef(A, 1));
+ // if (...) Diag() unrecognized parameter name
+ } else {
+ // Diag() \\param command requires at most 2 arguments
+ }
+ }
+ return Command;
+}
+
+ParamCommandComment *Sema::actOnParamCommandFinish(ParamCommandComment *Command,
+ ParagraphComment *Paragraph) {
+ Command->setParagraph(Paragraph);
+ return Command;
+}
+
+InlineCommandComment *Sema::actOnInlineCommand(SourceLocation CommandLocBegin,
+ SourceLocation CommandLocEnd,
+ StringRef CommandName) {
+ ArrayRef<InlineCommandComment::Argument> Args;
+ return new (Allocator) InlineCommandComment(CommandLocBegin,
+ CommandLocEnd,
+ CommandName,
+ Args);
+}
+
+InlineCommandComment *Sema::actOnInlineCommand(SourceLocation CommandLocBegin,
+ SourceLocation CommandLocEnd,
+ StringRef CommandName,
+ SourceLocation ArgLocBegin,
+ SourceLocation ArgLocEnd,
+ StringRef Arg) {
+ typedef InlineCommandComment::Argument Argument;
+ Argument *A = new (Allocator) Argument(SourceRange(ArgLocBegin,
+ ArgLocEnd),
+ Arg);
+
+ return new (Allocator) InlineCommandComment(CommandLocBegin,
+ CommandLocEnd,
+ CommandName,
+ llvm::makeArrayRef(A, 1));
+}
+
+InlineContentComment *Sema::actOnUnknownCommand(SourceLocation LocBegin,
+ SourceLocation LocEnd,
+ StringRef Name) {
+ ArrayRef<InlineCommandComment::Argument> Args;
+ return new (Allocator) InlineCommandComment(LocBegin, LocEnd, Name, Args);
+}
+
+TextComment *Sema::actOnText(SourceLocation LocBegin,
+ SourceLocation LocEnd,
+ StringRef Text) {
+ return new (Allocator) TextComment(LocBegin, LocEnd, Text);
+}
+
+VerbatimBlockComment *Sema::actOnVerbatimBlockStart(SourceLocation Loc,
+ StringRef Name) {
+ return new (Allocator) VerbatimBlockComment(
+ Loc,
+ Loc.getLocWithOffset(1 + Name.size()),
+ Name);
+}
+
+VerbatimBlockLineComment *Sema::actOnVerbatimBlockLine(SourceLocation Loc,
+ StringRef Text) {
+ return new (Allocator) VerbatimBlockLineComment(Loc, Text);
+}
+
+VerbatimBlockComment *Sema::actOnVerbatimBlockFinish(
+ VerbatimBlockComment *Block,
+ SourceLocation CloseNameLocBegin,
+ StringRef CloseName,
+ ArrayRef<VerbatimBlockLineComment *> Lines) {
+ Block->setCloseName(CloseName, CloseNameLocBegin);
+ Block->setLines(Lines);
+ return Block;
+}
+
+VerbatimLineComment *Sema::actOnVerbatimLine(SourceLocation LocBegin,
+ StringRef Name,
+ SourceLocation TextBegin,
+ StringRef Text) {
+ return new (Allocator) VerbatimLineComment(
+ LocBegin,
+ TextBegin.getLocWithOffset(Text.size()),
+ Name,
+ TextBegin,
+ Text);
+}
+
+HTMLOpenTagComment *Sema::actOnHTMLOpenTagStart(SourceLocation LocBegin,
+ StringRef TagName) {
+ return new (Allocator) HTMLOpenTagComment(LocBegin, TagName);
+}
+
+HTMLOpenTagComment *Sema::actOnHTMLOpenTagFinish(
+ HTMLOpenTagComment *Tag,
+ ArrayRef<HTMLOpenTagComment::Attribute> Attrs,
+ SourceLocation GreaterLoc) {
+ Tag->setAttrs(Attrs);
+ Tag->setGreaterLoc(GreaterLoc);
+ return Tag;
+}
+
+HTMLCloseTagComment *Sema::actOnHTMLCloseTag(SourceLocation LocBegin,
+ SourceLocation LocEnd,
+ StringRef TagName) {
+ return new (Allocator) HTMLCloseTagComment(LocBegin, LocEnd, TagName);
+}
+
+FullComment *Sema::actOnFullComment(
+ ArrayRef<BlockContentComment *> Blocks) {
+ return new (Allocator) FullComment(Blocks);
+}
+
+// TODO: tablegen
+bool Sema::isBlockCommand(StringRef Name) {
+ return llvm::StringSwitch<bool>(Name)
+ .Case("brief", true)
+ .Case("result", true)
+ .Case("return", true)
+ .Case("returns", true)
+ .Case("author", true)
+ .Case("authors", true)
+ .Case("pre", true)
+ .Case("post", true)
+ .Default(false) || isParamCommand(Name);
+}
+
+bool Sema::isParamCommand(StringRef Name) {
+ return llvm::StringSwitch<bool>(Name)
+ .Case("param", true)
+ .Case("arg", true)
+ .Default(false);
+}
+
+unsigned Sema::getBlockCommandNumArgs(StringRef Name) {
+ return llvm::StringSwitch<unsigned>(Name)
+ .Case("brief", 0)
+ .Case("pre", 0)
+ .Case("post", 0)
+ .Case("author", 0)
+ .Case("authors", 0)
+ .Default(0);
+}
+
+bool Sema::isInlineCommand(StringRef Name) {
+ return llvm::StringSwitch<bool>(Name)
+ .Case("c", true)
+ .Case("em", true)
+ .Default(false);
+}
+
+bool Sema::HTMLOpenTagNeedsClosing(StringRef Name) {
+ return llvm::StringSwitch<bool>(Name)
+ .Case("br", true)
+ .Default(true);
+}
+
+} // end namespace comments
+} // end namespace clang
+