lib/AST/CommentParser.cpp - fp2-dev/platform/external/clang - Gitiles

 //===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//

 #include "clang/AST/CommentParser.h"
 #include "clang/AST/CommentCommandTraits.h"
 #include "clang/AST/CommentDiagnostic.h"
 #include "clang/AST/CommentSema.h"
 #include "clang/Basic/CharInfo.h"
 #include "clang/Basic/SourceManager.h"
 #include "llvm/Support/ErrorHandling.h"

 namespace clang {

 static inline bool isWhitespace(llvm::StringRef S) {
   for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) {
     if (!isWhitespace(*I))
       return false;
   }
   return true;
 }

 namespace comments {

 /// Re-lexes a sequence of tok::text tokens.
 class TextTokenRetokenizer {
   llvm::BumpPtrAllocator &Allocator;
   Parser &P;

   /// This flag is set when there are no more tokens we can fetch from lexer.
   bool NoMoreInterestingTokens;

   /// Token buffer: tokens we have processed and lookahead.
   SmallVector<Token, 16> Toks;

   /// A position in \c Toks.
   struct Position {
     unsigned CurToken;
     const char *BufferStart;
     const char *BufferEnd;
     const char *BufferPtr;
     SourceLocation BufferStartLoc;
   };

   /// Current position in Toks.
   Position Pos;

   bool isEnd() const {
     return Pos.CurToken >= Toks.size();
   }

   /// Sets up the buffer pointers to point to current token.
   void setupBuffer() {
     assert(!isEnd());
     const Token &Tok = Toks[Pos.CurToken];

     Pos.BufferStart = Tok.getText().begin();
     Pos.BufferEnd = Tok.getText().end();
     Pos.BufferPtr = Pos.BufferStart;
     Pos.BufferStartLoc = Tok.getLocation();
   }

   SourceLocation getSourceLocation() const {
     const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart;
     return Pos.BufferStartLoc.getLocWithOffset(CharNo);
   }

   char peek() const {
     assert(!isEnd());
     assert(Pos.BufferPtr != Pos.BufferEnd);
     return *Pos.BufferPtr;
   }

   void consumeChar() {
     assert(!isEnd());
     assert(Pos.BufferPtr != Pos.BufferEnd);
     Pos.BufferPtr++;
     if (Pos.BufferPtr == Pos.BufferEnd) {
       Pos.CurToken++;
       if (isEnd() && !addToken())
         return;

       assert(!isEnd());
       setupBuffer();
     }
   }

   /// Add a token.
   /// Returns true on success, false if there are no interesting tokens to
   /// fetch from lexer.
   bool addToken() {
     if (NoMoreInterestingTokens)
       return false;

     if (P.Tok.is(tok::newline)) {
       // If we see a single newline token between text tokens, skip it.
       Token Newline = P.Tok;
       P.consumeToken();
       if (P.Tok.isNot(tok::text)) {
         P.putBack(Newline);
         NoMoreInterestingTokens = true;
         return false;
       }
     }
     if (P.Tok.isNot(tok::text)) {
       NoMoreInterestingTokens = true;
       return false;
     }

     Toks.push_back(P.Tok);
     P.consumeToken();
     if (Toks.size() == 1)
       setupBuffer();
     return true;
   }

   void consumeWhitespace() {
     while (!isEnd()) {
       if (isWhitespace(peek()))
         consumeChar();
       else
         break;
     }
   }

   void formTokenWithChars(Token &Result,
                           SourceLocation Loc,
                           const char *TokBegin,
                           unsigned TokLength,
                           StringRef Text) {
     Result.setLocation(Loc);
     Result.setKind(tok::text);
     Result.setLength(TokLength);
 #ifndef NDEBUG
     Result.TextPtr = "<UNSET>";
     Result.IntVal = 7;
 #endif
     Result.setText(Text);
   }

 public:
   TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
       Allocator(Allocator), P(P), NoMoreInterestingTokens(false) {
     Pos.CurToken = 0;
     addToken();
   }

   /// Extract a word -- sequence of non-whitespace characters.
   bool lexWord(Token &Tok) {
     if (isEnd())
       return false;

     Position SavedPos = Pos;

     consumeWhitespace();
     SmallString<32> WordText;
     const char *WordBegin = Pos.BufferPtr;
     SourceLocation Loc = getSourceLocation();
     while (!isEnd()) {
       const char C = peek();
       if (!isWhitespace(C)) {
         WordText.push_back(C);
         consumeChar();
       } else
         break;
     }
     const unsigned Length = WordText.size();
     if (Length == 0) {
       Pos = SavedPos;
       return false;
     }

     char *TextPtr = Allocator.Allocate<char>(Length + 1);

     memcpy(TextPtr, WordText.c_str(), Length + 1);
     StringRef Text = StringRef(TextPtr, Length);

     formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
     return true;
   }

   bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) {
     if (isEnd())
       return false;

     Position SavedPos = Pos;

     consumeWhitespace();
     SmallString<32> WordText;
     const char *WordBegin = Pos.BufferPtr;
     SourceLocation Loc = getSourceLocation();
     bool Error = false;
     if (!isEnd()) {
       const char C = peek();
       if (C == OpenDelim) {
         WordText.push_back(C);
         consumeChar();
       } else
         Error = true;
     }
     char C = '\0';
     while (!Error && !isEnd()) {
       C = peek();
       WordText.push_back(C);
       consumeChar();
       if (C == CloseDelim)
         break;
     }
     if (!Error && C != CloseDelim)
       Error = true;

     if (Error) {
       Pos = SavedPos;
       return false;
     }

     const unsigned Length = WordText.size();
     char *TextPtr = Allocator.Allocate<char>(Length + 1);

     memcpy(TextPtr, WordText.c_str(), Length + 1);
     StringRef Text = StringRef(TextPtr, Length);

     formTokenWithChars(Tok, Loc, WordBegin,
                        Pos.BufferPtr - WordBegin, Text);
     return true;
   }

   /// Put back tokens that we didn't consume.
   void putBackLeftoverTokens() {
     if (isEnd())
       return;

     bool HavePartialTok = false;
     Token PartialTok;
     if (Pos.BufferPtr != Pos.BufferStart) {
       formTokenWithChars(PartialTok, getSourceLocation(),
                          Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr,
                          StringRef(Pos.BufferPtr,
                                    Pos.BufferEnd - Pos.BufferPtr));
       HavePartialTok = true;
       Pos.CurToken++;
     }

     P.putBack(llvm::makeArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
     Pos.CurToken = Toks.size();

     if (HavePartialTok)
       P.putBack(PartialTok);
   }
 };

 Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
                const SourceManager &SourceMgr, DiagnosticsEngine &Diags,
                const CommandTraits &Traits):
     L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags),
     Traits(Traits) {
   consumeToken();
 }

 void Parser::parseParamCommandArgs(ParamCommandComment *PC,
                                    TextTokenRetokenizer &Retokenizer) {
   Token Arg;
   // Check if argument looks like direction specification: [dir]
   // e.g., [in], [out], [in,out]
   if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
     S.actOnParamCommandDirectionArg(PC,
                                     Arg.getLocation(),
                                     Arg.getEndLocation(),
                                     Arg.getText());

   if (Retokenizer.lexWord(Arg))
     S.actOnParamCommandParamNameArg(PC,
                                     Arg.getLocation(),
                                     Arg.getEndLocation(),
                                     Arg.getText());
 }

 void Parser::parseTParamCommandArgs(TParamCommandComment *TPC,
                                     TextTokenRetokenizer &Retokenizer) {
   Token Arg;
   if (Retokenizer.lexWord(Arg))
     S.actOnTParamCommandParamNameArg(TPC,
                                      Arg.getLocation(),
                                      Arg.getEndLocation(),
                                      Arg.getText());
 }

 void Parser::parseBlockCommandArgs(BlockCommandComment *BC,
                                    TextTokenRetokenizer &Retokenizer,
                                    unsigned NumArgs) {
   typedef BlockCommandComment::Argument Argument;
   Argument *Args =
       new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs];
   unsigned ParsedArgs = 0;
   Token Arg;
   while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
     Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),
                                             Arg.getEndLocation()),
                                 Arg.getText());
     ParsedArgs++;
   }

   S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));
 }

 BlockCommandComment *Parser::parseBlockCommand() {
   assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));

   ParamCommandComment *PC = 0;
   TParamCommandComment *TPC = 0;
   BlockCommandComment *BC = 0;
   const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
   CommandMarkerKind CommandMarker =
       Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At;
   if (Info->IsParamCommand) {
     PC = S.actOnParamCommandStart(Tok.getLocation(),
                                   Tok.getEndLocation(),
                                   Tok.getCommandID(),
                                   CommandMarker);
   } else if (Info->IsTParamCommand) {
     TPC = S.actOnTParamCommandStart(Tok.getLocation(),
                                     Tok.getEndLocation(),
                                     Tok.getCommandID(),
                                     CommandMarker);
   } else {
     BC = S.actOnBlockCommandStart(Tok.getLocation(),
                                   Tok.getEndLocation(),
                                   Tok.getCommandID(),
                                   CommandMarker);
   }
   consumeToken();

   if (isTokBlockCommand()) {
     // Block command ahead.  We can't nest block commands, so pretend that this
     // command has an empty argument.
     ParagraphComment *Paragraph = S.actOnParagraphComment(None);
     if (PC) {
       S.actOnParamCommandFinish(PC, Paragraph);
       return PC;
     } else if (TPC) {
       S.actOnTParamCommandFinish(TPC, Paragraph);
       return TPC;
     } else {
       S.actOnBlockCommandFinish(BC, Paragraph);
       return BC;
     }
   }

   if (PC || TPC || Info->NumArgs > 0) {
     // In order to parse command arguments we need to retokenize a few
     // following text tokens.
     TextTokenRetokenizer Retokenizer(Allocator, *this);

     if (PC)
       parseParamCommandArgs(PC, Retokenizer);
     else if (TPC)
       parseTParamCommandArgs(TPC, Retokenizer);
     else
       parseBlockCommandArgs(BC, Retokenizer, Info->NumArgs);

     Retokenizer.putBackLeftoverTokens();
   }

   // If there's a block command ahead, we will attach an empty paragraph to
   // this command.
   bool EmptyParagraph = false;
   if (isTokBlockCommand())
     EmptyParagraph = true;
   else if (Tok.is(tok::newline)) {
     Token PrevTok = Tok;
     consumeToken();
     EmptyParagraph = isTokBlockCommand();
     putBack(PrevTok);
   }

   ParagraphComment *Paragraph;
   if (EmptyParagraph)
     Paragraph = S.actOnParagraphComment(None);
   else {
     BlockContentComment *Block = parseParagraphOrBlockCommand();
     // Since we have checked for a block command, we should have parsed a
     // paragraph.
     Paragraph = cast<ParagraphComment>(Block);
   }

   if (PC) {
     S.actOnParamCommandFinish(PC, Paragraph);
     return PC;
   } else if (TPC) {
     S.actOnTParamCommandFinish(TPC, Paragraph);
     return TPC;
   } else {
     S.actOnBlockCommandFinish(BC, Paragraph);
     return BC;
   }
 }

 InlineCommandComment *Parser::parseInlineCommand() {
   assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));

   const Token CommandTok = Tok;
   consumeToken();

   TextTokenRetokenizer Retokenizer(Allocator, *this);

   Token ArgTok;
   bool ArgTokValid = Retokenizer.lexWord(ArgTok);

   InlineCommandComment *IC;
   if (ArgTokValid) {
     IC = S.actOnInlineCommand(CommandTok.getLocation(),
                               CommandTok.getEndLocation(),
                               CommandTok.getCommandID(),
                               ArgTok.getLocation(),
                               ArgTok.getEndLocation(),
                               ArgTok.getText());
   } else {
     IC = S.actOnInlineCommand(CommandTok.getLocation(),
                               CommandTok.getEndLocation(),
                               CommandTok.getCommandID());
   }

   Retokenizer.putBackLeftoverTokens();

   return IC;
 }

 HTMLStartTagComment *Parser::parseHTMLStartTag() {
   assert(Tok.is(tok::html_start_tag));
   HTMLStartTagComment *HST =
       S.actOnHTMLStartTagStart(Tok.getLocation(),
                                Tok.getHTMLTagStartName());
   consumeToken();

   SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
   while (true) {
     switch (Tok.getKind()) {
     case tok::html_ident: {
       Token Ident = Tok;
       consumeToken();
       if (Tok.isNot(tok::html_equals)) {
         Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
                                                        Ident.getHTMLIdent()));
         continue;
       }
       Token Equals = Tok;
       consumeToken();
       if (Tok.isNot(tok::html_quoted_string)) {
         Diag(Tok.getLocation(),
              diag::warn_doc_html_start_tag_expected_quoted_string)
           << SourceRange(Equals.getLocation());
         Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
                                                        Ident.getHTMLIdent()));
         while (Tok.is(tok::html_equals) ||
                Tok.is(tok::html_quoted_string))
           consumeToken();
         continue;
       }
       Attrs.push_back(HTMLStartTagComment::Attribute(
                               Ident.getLocation(),
                               Ident.getHTMLIdent(),
                               Equals.getLocation(),
                               SourceRange(Tok.getLocation(),
                                           Tok.getEndLocation()),
                               Tok.getHTMLQuotedString()));
       consumeToken();
       continue;
     }

     case tok::html_greater:
       S.actOnHTMLStartTagFinish(HST,
                                 S.copyArray(llvm::makeArrayRef(Attrs)),
                                 Tok.getLocation(),
                                 /* IsSelfClosing = */ false);
       consumeToken();
       return HST;

     case tok::html_slash_greater:
       S.actOnHTMLStartTagFinish(HST,
                                 S.copyArray(llvm::makeArrayRef(Attrs)),
                                 Tok.getLocation(),
                                 /* IsSelfClosing = */ true);
       consumeToken();
       return HST;

     case tok::html_equals:
     case tok::html_quoted_string:
       Diag(Tok.getLocation(),
            diag::warn_doc_html_start_tag_expected_ident_or_greater);
       while (Tok.is(tok::html_equals) ||
              Tok.is(tok::html_quoted_string))
         consumeToken();
       if (Tok.is(tok::html_ident) ||
           Tok.is(tok::html_greater) ||
           Tok.is(tok::html_slash_greater))
         continue;

       S.actOnHTMLStartTagFinish(HST,
                                 S.copyArray(llvm::makeArrayRef(Attrs)),
                                 SourceLocation(),
                                 /* IsSelfClosing = */ false);
       return HST;

     default:
       // Not a token from an HTML start tag.  Thus HTML tag prematurely ended.
       S.actOnHTMLStartTagFinish(HST,
                                 S.copyArray(llvm::makeArrayRef(Attrs)),
                                 SourceLocation(),
                                 /* IsSelfClosing = */ false);
       bool StartLineInvalid;
       const unsigned StartLine = SourceMgr.getPresumedLineNumber(
                                                   HST->getLocation(),
                                                   &StartLineInvalid);
       bool EndLineInvalid;
       const unsigned EndLine = SourceMgr.getPresumedLineNumber(
                                                   Tok.getLocation(),
                                                   &EndLineInvalid);
       if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
         Diag(Tok.getLocation(),
              diag::warn_doc_html_start_tag_expected_ident_or_greater)
           << HST->getSourceRange();
       else {
         Diag(Tok.getLocation(),
              diag::warn_doc_html_start_tag_expected_ident_or_greater);
         Diag(HST->getLocation(), diag::note_doc_html_tag_started_here)
           << HST->getSourceRange();
       }
       return HST;
     }
   }
 }

 HTMLEndTagComment *Parser::parseHTMLEndTag() {
   assert(Tok.is(tok::html_end_tag));
   Token TokEndTag = Tok;
   consumeToken();
   SourceLocation Loc;
   if (Tok.is(tok::html_greater)) {
     Loc = Tok.getLocation();
     consumeToken();
   }

   return S.actOnHTMLEndTag(TokEndTag.getLocation(),
                            Loc,
                            TokEndTag.getHTMLTagEndName());
 }

 BlockContentComment *Parser::parseParagraphOrBlockCommand() {
   SmallVector<InlineContentComment *, 8> Content;

   while (true) {
     switch (Tok.getKind()) {
     case tok::verbatim_block_begin:
     case tok::verbatim_line_name:
     case tok::eof:
       assert(Content.size() != 0);
       break; // Block content or EOF ahead, finish this parapgaph.

     case tok::unknown_command:
       Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
                                               Tok.getEndLocation(),
                                               Tok.getUnknownCommandName()));
       consumeToken();
       continue;

     case tok::backslash_command:
     case tok::at_command: {
       const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
       if (Info->IsBlockCommand) {
         if (Content.size() == 0)
           return parseBlockCommand();
         break; // Block command ahead, finish this parapgaph.
       }
       if (Info->IsVerbatimBlockEndCommand) {
         Diag(Tok.getLocation(),
              diag::warn_verbatim_block_end_without_start)
           << Tok.is(tok::at_command)
           << Info->Name
           << SourceRange(Tok.getLocation(), Tok.getEndLocation());
         consumeToken();
         continue;
       }
       if (Info->IsUnknownCommand) {
         Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
                                                 Tok.getEndLocation(),
                                                 Info->getID()));
         consumeToken();
         continue;
       }
       assert(Info->IsInlineCommand);
       Content.push_back(parseInlineCommand());
       continue;
     }

     case tok::newline: {
       consumeToken();
       if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
         consumeToken();
         break; // Two newlines -- end of paragraph.
       }
       // Also allow [tok::newline, tok::text, tok::newline] if the middle
       // tok::text is just whitespace.
       if (Tok.is(tok::text) && isWhitespace(Tok.getText())) {
         Token WhitespaceTok = Tok;
         consumeToken();
         if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
           consumeToken();
           break;
         }
         // We have [tok::newline, tok::text, non-newline].  Put back tok::text.
         putBack(WhitespaceTok);
       }
       if (Content.size() > 0)
         Content.back()->addTrailingNewline();
       continue;
     }

     // Don't deal with HTML tag soup now.
     case tok::html_start_tag:
       Content.push_back(parseHTMLStartTag());
       continue;

     case tok::html_end_tag:
       Content.push_back(parseHTMLEndTag());
       continue;

     case tok::text:
       Content.push_back(S.actOnText(Tok.getLocation(),
                                     Tok.getEndLocation(),
                                     Tok.getText()));
       consumeToken();
       continue;

     case tok::verbatim_block_line:
     case tok::verbatim_block_end:
     case tok::verbatim_line_text:
     case tok::html_ident:
     case tok::html_equals:
     case tok::html_quoted_string:
     case tok::html_greater:
     case tok::html_slash_greater:
       llvm_unreachable("should not see this token");
     }
     break;
   }

   return S.actOnParagraphComment(S.copyArray(llvm::makeArrayRef(Content)));
 }

 VerbatimBlockComment *Parser::parseVerbatimBlock() {
   assert(Tok.is(tok::verbatim_block_begin));

   VerbatimBlockComment *VB =
       S.actOnVerbatimBlockStart(Tok.getLocation(),
                                 Tok.getVerbatimBlockID());
   consumeToken();

   // Don't create an empty line if verbatim opening command is followed
   // by a newline.
   if (Tok.is(tok::newline))
     consumeToken();

   SmallVector<VerbatimBlockLineComment *, 8> Lines;
   while (Tok.is(tok::verbatim_block_line) ||
          Tok.is(tok::newline)) {
     VerbatimBlockLineComment *Line;
     if (Tok.is(tok::verbatim_block_line)) {
       Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
                                       Tok.getVerbatimBlockText());
       consumeToken();
       if (Tok.is(tok::newline)) {
         consumeToken();
       }
     } else {
       // Empty line, just a tok::newline.
       Line = S.actOnVerbatimBlockLine(Tok.getLocation(), "");
       consumeToken();
     }
     Lines.push_back(Line);
   }

   if (Tok.is(tok::verbatim_block_end)) {
     const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID());
     S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),
                                Info->Name,
                                S.copyArray(llvm::makeArrayRef(Lines)));
     consumeToken();
   } else {
     // Unterminated \\verbatim block
     S.actOnVerbatimBlockFinish(VB, SourceLocation(), "",
                                S.copyArray(llvm::makeArrayRef(Lines)));
   }

   return VB;
 }

 VerbatimLineComment *Parser::parseVerbatimLine() {
   assert(Tok.is(tok::verbatim_line_name));

   Token NameTok = Tok;
   consumeToken();

   SourceLocation TextBegin;
   StringRef Text;
   // Next token might not be a tok::verbatim_line_text if verbatim line
   // starting command comes just before a newline or comment end.
   if (Tok.is(tok::verbatim_line_text)) {
     TextBegin = Tok.getLocation();
     Text = Tok.getVerbatimLineText();
   } else {
     TextBegin = NameTok.getEndLocation();
     Text = "";
   }

   VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
                                                 NameTok.getVerbatimLineID(),
                                                 TextBegin,
                                                 Text);
   consumeToken();
   return VL;
 }

 BlockContentComment *Parser::parseBlockContent() {
   switch (Tok.getKind()) {
   case tok::text:
   case tok::unknown_command:
   case tok::backslash_command:
   case tok::at_command:
   case tok::html_start_tag:
   case tok::html_end_tag:
     return parseParagraphOrBlockCommand();

   case tok::verbatim_block_begin:
     return parseVerbatimBlock();

   case tok::verbatim_line_name:
     return parseVerbatimLine();

   case tok::eof:
   case tok::newline:
   case tok::verbatim_block_line:
   case tok::verbatim_block_end:
   case tok::verbatim_line_text:
   case tok::html_ident:
   case tok::html_equals:
   case tok::html_quoted_string:
   case tok::html_greater:
   case tok::html_slash_greater:
     llvm_unreachable("should not see this token");
   }
   llvm_unreachable("bogus token kind");
 }

 FullComment *Parser::parseFullComment() {
   // Skip newlines at the beginning of the comment.
   while (Tok.is(tok::newline))
     consumeToken();

   SmallVector<BlockContentComment *, 8> Blocks;
   while (Tok.isNot(tok::eof)) {
     Blocks.push_back(parseBlockContent());

     // Skip extra newlines after paragraph end.
     while (Tok.is(tok::newline))
       consumeToken();
   }
   return S.actOnFullComment(S.copyArray(llvm::makeArrayRef(Blocks)));
 }

 } // end namespace comments
 } // end namespace clang
	//===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//

	#include "clang/AST/CommentParser.h"
	#include "clang/AST/CommentCommandTraits.h"
	#include "clang/AST/CommentDiagnostic.h"
	#include "clang/AST/CommentSema.h"
	#include "clang/Basic/CharInfo.h"
	#include "clang/Basic/SourceManager.h"
	#include "llvm/Support/ErrorHandling.h"

	namespace clang {

	static inline bool isWhitespace(llvm::StringRef S) {
	for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) {
	if (!isWhitespace(*I))
	return false;
	}
	return true;
	}

	namespace comments {

	/// Re-lexes a sequence of tok::text tokens.
	class TextTokenRetokenizer {
	llvm::BumpPtrAllocator &Allocator;
	Parser &P;

	/// This flag is set when there are no more tokens we can fetch from lexer.
	bool NoMoreInterestingTokens;

	/// Token buffer: tokens we have processed and lookahead.
	SmallVector<Token, 16> Toks;

	/// A position in \c Toks.
	struct Position {
	unsigned CurToken;
	const char *BufferStart;
	const char *BufferEnd;
	const char *BufferPtr;
	SourceLocation BufferStartLoc;
	};

	/// Current position in Toks.
	Position Pos;

	bool isEnd() const {
	return Pos.CurToken >= Toks.size();
	}

	/// Sets up the buffer pointers to point to current token.
	void setupBuffer() {
	assert(!isEnd());
	const Token &Tok = Toks[Pos.CurToken];

	Pos.BufferStart = Tok.getText().begin();
	Pos.BufferEnd = Tok.getText().end();
	Pos.BufferPtr = Pos.BufferStart;
	Pos.BufferStartLoc = Tok.getLocation();
	}

	SourceLocation getSourceLocation() const {
	const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart;
	return Pos.BufferStartLoc.getLocWithOffset(CharNo);
	}

	char peek() const {
	assert(!isEnd());
	assert(Pos.BufferPtr != Pos.BufferEnd);
	return *Pos.BufferPtr;
	}

	void consumeChar() {
	assert(!isEnd());
	assert(Pos.BufferPtr != Pos.BufferEnd);
	Pos.BufferPtr++;
	if (Pos.BufferPtr == Pos.BufferEnd) {
	Pos.CurToken++;
	if (isEnd() && !addToken())
	return;

	assert(!isEnd());
	setupBuffer();
	}
	}

	/// Add a token.
	/// Returns true on success, false if there are no interesting tokens to
	/// fetch from lexer.
	bool addToken() {
	if (NoMoreInterestingTokens)
	return false;

	if (P.Tok.is(tok::newline)) {
	// If we see a single newline token between text tokens, skip it.
	Token Newline = P.Tok;
	P.consumeToken();
	if (P.Tok.isNot(tok::text)) {
	P.putBack(Newline);
	NoMoreInterestingTokens = true;
	return false;
	}
	}
	if (P.Tok.isNot(tok::text)) {
	NoMoreInterestingTokens = true;
	return false;
	}

	Toks.push_back(P.Tok);
	P.consumeToken();
	if (Toks.size() == 1)
	setupBuffer();
	return true;
	}

	void consumeWhitespace() {
	while (!isEnd()) {
	if (isWhitespace(peek()))
	consumeChar();
	else
	break;
	}
	}

	void formTokenWithChars(Token &Result,
	SourceLocation Loc,
	const char *TokBegin,
	unsigned TokLength,
	StringRef Text) {
	Result.setLocation(Loc);
	Result.setKind(tok::text);
	Result.setLength(TokLength);
	#ifndef NDEBUG
	Result.TextPtr = "<UNSET>";
	Result.IntVal = 7;
	#endif
	Result.setText(Text);
	}

	public:
	TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
	Allocator(Allocator), P(P), NoMoreInterestingTokens(false) {
	Pos.CurToken = 0;
	addToken();
	}

	/// Extract a word -- sequence of non-whitespace characters.
	bool lexWord(Token &Tok) {
	if (isEnd())
	return false;

	Position SavedPos = Pos;

	consumeWhitespace();
	SmallString<32> WordText;
	const char *WordBegin = Pos.BufferPtr;
	SourceLocation Loc = getSourceLocation();
	while (!isEnd()) {
	const char C = peek();
	if (!isWhitespace(C)) {
	WordText.push_back(C);
	consumeChar();
	} else
	break;
	}
	const unsigned Length = WordText.size();
	if (Length == 0) {
	Pos = SavedPos;
	return false;
	}

	char *TextPtr = Allocator.Allocate<char>(Length + 1);

	memcpy(TextPtr, WordText.c_str(), Length + 1);
	StringRef Text = StringRef(TextPtr, Length);

	formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
	return true;
	}

	bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) {
	if (isEnd())
	return false;

	Position SavedPos = Pos;

	consumeWhitespace();
	SmallString<32> WordText;
	const char *WordBegin = Pos.BufferPtr;
	SourceLocation Loc = getSourceLocation();
	bool Error = false;
	if (!isEnd()) {
	const char C = peek();
	if (C == OpenDelim) {
	WordText.push_back(C);
	consumeChar();
	} else
	Error = true;
	}
	char C = '\0';
	while (!Error && !isEnd()) {
	C = peek();
	WordText.push_back(C);
	consumeChar();
	if (C == CloseDelim)
	break;
	}
	if (!Error && C != CloseDelim)
	Error = true;

	if (Error) {
	Pos = SavedPos;
	return false;
	}

	const unsigned Length = WordText.size();
	char *TextPtr = Allocator.Allocate<char>(Length + 1);

	memcpy(TextPtr, WordText.c_str(), Length + 1);
	StringRef Text = StringRef(TextPtr, Length);

	formTokenWithChars(Tok, Loc, WordBegin,
	Pos.BufferPtr - WordBegin, Text);
	return true;
	}

	/// Put back tokens that we didn't consume.
	void putBackLeftoverTokens() {
	if (isEnd())
	return;

	bool HavePartialTok = false;
	Token PartialTok;
	if (Pos.BufferPtr != Pos.BufferStart) {
	formTokenWithChars(PartialTok, getSourceLocation(),
	Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr,
	StringRef(Pos.BufferPtr,
	Pos.BufferEnd - Pos.BufferPtr));
	HavePartialTok = true;
	Pos.CurToken++;
	}

	P.putBack(llvm::makeArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
	Pos.CurToken = Toks.size();

	if (HavePartialTok)
	P.putBack(PartialTok);
	}
	};

	Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
	const SourceManager &SourceMgr, DiagnosticsEngine &Diags,
	const CommandTraits &Traits):
	L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags),
	Traits(Traits) {
	consumeToken();
	}

	void Parser::parseParamCommandArgs(ParamCommandComment *PC,
	TextTokenRetokenizer &Retokenizer) {
	Token Arg;
	// Check if argument looks like direction specification: [dir]
	// e.g., [in], [out], [in,out]
	if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
	S.actOnParamCommandDirectionArg(PC,
	Arg.getLocation(),
	Arg.getEndLocation(),
	Arg.getText());

	if (Retokenizer.lexWord(Arg))
	S.actOnParamCommandParamNameArg(PC,
	Arg.getLocation(),
	Arg.getEndLocation(),
	Arg.getText());
	}

	void Parser::parseTParamCommandArgs(TParamCommandComment *TPC,
	TextTokenRetokenizer &Retokenizer) {
	Token Arg;
	if (Retokenizer.lexWord(Arg))
	S.actOnTParamCommandParamNameArg(TPC,
	Arg.getLocation(),
	Arg.getEndLocation(),
	Arg.getText());
	}

	void Parser::parseBlockCommandArgs(BlockCommandComment *BC,
	TextTokenRetokenizer &Retokenizer,
	unsigned NumArgs) {
	typedef BlockCommandComment::Argument Argument;
	Argument *Args =
	new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs];
	unsigned ParsedArgs = 0;
	Token Arg;
	while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
	Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),
	Arg.getEndLocation()),
	Arg.getText());
	ParsedArgs++;
	}

	S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));
	}

	BlockCommandComment *Parser::parseBlockCommand() {
	assert(Tok.is(tok::backslash_command) \|\| Tok.is(tok::at_command));

	ParamCommandComment *PC = 0;
	TParamCommandComment *TPC = 0;
	BlockCommandComment *BC = 0;
	const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
	CommandMarkerKind CommandMarker =
	Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At;
	if (Info->IsParamCommand) {
	PC = S.actOnParamCommandStart(Tok.getLocation(),
	Tok.getEndLocation(),
	Tok.getCommandID(),
	CommandMarker);
	} else if (Info->IsTParamCommand) {
	TPC = S.actOnTParamCommandStart(Tok.getLocation(),
	Tok.getEndLocation(),
	Tok.getCommandID(),
	CommandMarker);
	} else {
	BC = S.actOnBlockCommandStart(Tok.getLocation(),
	Tok.getEndLocation(),
	Tok.getCommandID(),
	CommandMarker);
	}
	consumeToken();

	if (isTokBlockCommand()) {
	// Block command ahead. We can't nest block commands, so pretend that this
	// command has an empty argument.
	ParagraphComment *Paragraph = S.actOnParagraphComment(None);
	if (PC) {
	S.actOnParamCommandFinish(PC, Paragraph);
	return PC;
	} else if (TPC) {
	S.actOnTParamCommandFinish(TPC, Paragraph);
	return TPC;
	} else {
	S.actOnBlockCommandFinish(BC, Paragraph);
	return BC;
	}
	}

	if (PC \|\| TPC \|\| Info->NumArgs > 0) {
	// In order to parse command arguments we need to retokenize a few
	// following text tokens.
	TextTokenRetokenizer Retokenizer(Allocator, *this);

	if (PC)
	parseParamCommandArgs(PC, Retokenizer);
	else if (TPC)
	parseTParamCommandArgs(TPC, Retokenizer);
	else
	parseBlockCommandArgs(BC, Retokenizer, Info->NumArgs);

	Retokenizer.putBackLeftoverTokens();
	}

	// If there's a block command ahead, we will attach an empty paragraph to
	// this command.
	bool EmptyParagraph = false;
	if (isTokBlockCommand())
	EmptyParagraph = true;
	else if (Tok.is(tok::newline)) {
	Token PrevTok = Tok;
	consumeToken();
	EmptyParagraph = isTokBlockCommand();
	putBack(PrevTok);
	}

	ParagraphComment *Paragraph;
	if (EmptyParagraph)
	Paragraph = S.actOnParagraphComment(None);
	else {
	BlockContentComment *Block = parseParagraphOrBlockCommand();
	// Since we have checked for a block command, we should have parsed a
	// paragraph.
	Paragraph = cast<ParagraphComment>(Block);
	}

	if (PC) {
	S.actOnParamCommandFinish(PC, Paragraph);
	return PC;
	} else if (TPC) {
	S.actOnTParamCommandFinish(TPC, Paragraph);
	return TPC;
	} else {
	S.actOnBlockCommandFinish(BC, Paragraph);
	return BC;
	}
	}

	InlineCommandComment *Parser::parseInlineCommand() {
	assert(Tok.is(tok::backslash_command) \|\| Tok.is(tok::at_command));

	const Token CommandTok = Tok;
	consumeToken();

	TextTokenRetokenizer Retokenizer(Allocator, *this);

	Token ArgTok;
	bool ArgTokValid = Retokenizer.lexWord(ArgTok);

	InlineCommandComment *IC;
	if (ArgTokValid) {
	IC = S.actOnInlineCommand(CommandTok.getLocation(),
	CommandTok.getEndLocation(),
	CommandTok.getCommandID(),
	ArgTok.getLocation(),
	ArgTok.getEndLocation(),
	ArgTok.getText());
	} else {
	IC = S.actOnInlineCommand(CommandTok.getLocation(),
	CommandTok.getEndLocation(),
	CommandTok.getCommandID());
	}

	Retokenizer.putBackLeftoverTokens();

	return IC;
	}

	HTMLStartTagComment *Parser::parseHTMLStartTag() {
	assert(Tok.is(tok::html_start_tag));
	HTMLStartTagComment *HST =
	S.actOnHTMLStartTagStart(Tok.getLocation(),
	Tok.getHTMLTagStartName());
	consumeToken();

	SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
	while (true) {
	switch (Tok.getKind()) {
	case tok::html_ident: {
	Token Ident = Tok;
	consumeToken();
	if (Tok.isNot(tok::html_equals)) {
	Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
	Ident.getHTMLIdent()));
	continue;
	}
	Token Equals = Tok;
	consumeToken();
	if (Tok.isNot(tok::html_quoted_string)) {
	Diag(Tok.getLocation(),
	diag::warn_doc_html_start_tag_expected_quoted_string)
	<< SourceRange(Equals.getLocation());
	Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
	Ident.getHTMLIdent()));
	while (Tok.is(tok::html_equals) \|\|
	Tok.is(tok::html_quoted_string))
	consumeToken();
	continue;
	}
	Attrs.push_back(HTMLStartTagComment::Attribute(
	Ident.getLocation(),
	Ident.getHTMLIdent(),
	Equals.getLocation(),
	SourceRange(Tok.getLocation(),
	Tok.getEndLocation()),
	Tok.getHTMLQuotedString()));
	consumeToken();
	continue;
	}

	case tok::html_greater:
	S.actOnHTMLStartTagFinish(HST,
	S.copyArray(llvm::makeArrayRef(Attrs)),
	Tok.getLocation(),
	/* IsSelfClosing = */ false);
	consumeToken();
	return HST;

	case tok::html_slash_greater:
	S.actOnHTMLStartTagFinish(HST,
	S.copyArray(llvm::makeArrayRef(Attrs)),
	Tok.getLocation(),
	/* IsSelfClosing = */ true);
	consumeToken();
	return HST;

	case tok::html_equals:
	case tok::html_quoted_string:
	Diag(Tok.getLocation(),
	diag::warn_doc_html_start_tag_expected_ident_or_greater);
	while (Tok.is(tok::html_equals) \|\|
	Tok.is(tok::html_quoted_string))
	consumeToken();
	if (Tok.is(tok::html_ident) \|\|
	Tok.is(tok::html_greater) \|\|
	Tok.is(tok::html_slash_greater))
	continue;

	S.actOnHTMLStartTagFinish(HST,
	S.copyArray(llvm::makeArrayRef(Attrs)),
	SourceLocation(),
	/* IsSelfClosing = */ false);
	return HST;

	default:
	// Not a token from an HTML start tag. Thus HTML tag prematurely ended.
	S.actOnHTMLStartTagFinish(HST,
	S.copyArray(llvm::makeArrayRef(Attrs)),
	SourceLocation(),
	/* IsSelfClosing = */ false);
	bool StartLineInvalid;
	const unsigned StartLine = SourceMgr.getPresumedLineNumber(
	HST->getLocation(),
	&StartLineInvalid);
	bool EndLineInvalid;
	const unsigned EndLine = SourceMgr.getPresumedLineNumber(
	Tok.getLocation(),
	&EndLineInvalid);
	if (StartLineInvalid \|\| EndLineInvalid \|\| StartLine == EndLine)
	Diag(Tok.getLocation(),
	diag::warn_doc_html_start_tag_expected_ident_or_greater)
	<< HST->getSourceRange();
	else {
	Diag(Tok.getLocation(),
	diag::warn_doc_html_start_tag_expected_ident_or_greater);
	Diag(HST->getLocation(), diag::note_doc_html_tag_started_here)
	<< HST->getSourceRange();
	}
	return HST;
	}
	}
	}

	HTMLEndTagComment *Parser::parseHTMLEndTag() {
	assert(Tok.is(tok::html_end_tag));
	Token TokEndTag = Tok;
	consumeToken();
	SourceLocation Loc;
	if (Tok.is(tok::html_greater)) {
	Loc = Tok.getLocation();
	consumeToken();
	}

	return S.actOnHTMLEndTag(TokEndTag.getLocation(),
	Loc,
	TokEndTag.getHTMLTagEndName());
	}

	BlockContentComment *Parser::parseParagraphOrBlockCommand() {
	SmallVector<InlineContentComment *, 8> Content;

	while (true) {
	switch (Tok.getKind()) {
	case tok::verbatim_block_begin:
	case tok::verbatim_line_name:
	case tok::eof:
	assert(Content.size() != 0);
	break; // Block content or EOF ahead, finish this parapgaph.

	case tok::unknown_command:
	Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
	Tok.getEndLocation(),
	Tok.getUnknownCommandName()));
	consumeToken();
	continue;

	case tok::backslash_command:
	case tok::at_command: {
	const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
	if (Info->IsBlockCommand) {
	if (Content.size() == 0)
	return parseBlockCommand();
	break; // Block command ahead, finish this parapgaph.
	}
	if (Info->IsVerbatimBlockEndCommand) {
	Diag(Tok.getLocation(),
	diag::warn_verbatim_block_end_without_start)
	<< Tok.is(tok::at_command)
	<< Info->Name
	<< SourceRange(Tok.getLocation(), Tok.getEndLocation());
	consumeToken();
	continue;
	}
	if (Info->IsUnknownCommand) {
	Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
	Tok.getEndLocation(),
	Info->getID()));
	consumeToken();
	continue;
	}
	assert(Info->IsInlineCommand);
	Content.push_back(parseInlineCommand());
	continue;
	}

	case tok::newline: {
	consumeToken();
	if (Tok.is(tok::newline) \|\| Tok.is(tok::eof)) {
	consumeToken();
	break; // Two newlines -- end of paragraph.
	}
	// Also allow [tok::newline, tok::text, tok::newline] if the middle
	// tok::text is just whitespace.
	if (Tok.is(tok::text) && isWhitespace(Tok.getText())) {
	Token WhitespaceTok = Tok;
	consumeToken();
	if (Tok.is(tok::newline) \|\| Tok.is(tok::eof)) {
	consumeToken();
	break;
	}
	// We have [tok::newline, tok::text, non-newline]. Put back tok::text.
	putBack(WhitespaceTok);
	}
	if (Content.size() > 0)
	Content.back()->addTrailingNewline();
	continue;
	}

	// Don't deal with HTML tag soup now.
	case tok::html_start_tag:
	Content.push_back(parseHTMLStartTag());
	continue;

	case tok::html_end_tag:
	Content.push_back(parseHTMLEndTag());
	continue;

	case tok::text:
	Content.push_back(S.actOnText(Tok.getLocation(),
	Tok.getEndLocation(),
	Tok.getText()));
	consumeToken();
	continue;

	case tok::verbatim_block_line:
	case tok::verbatim_block_end:
	case tok::verbatim_line_text:
	case tok::html_ident:
	case tok::html_equals:
	case tok::html_quoted_string:
	case tok::html_greater:
	case tok::html_slash_greater:
	llvm_unreachable("should not see this token");
	}
	break;
	}

	return S.actOnParagraphComment(S.copyArray(llvm::makeArrayRef(Content)));
	}

	VerbatimBlockComment *Parser::parseVerbatimBlock() {
	assert(Tok.is(tok::verbatim_block_begin));

	VerbatimBlockComment *VB =
	S.actOnVerbatimBlockStart(Tok.getLocation(),
	Tok.getVerbatimBlockID());
	consumeToken();

	// Don't create an empty line if verbatim opening command is followed
	// by a newline.
	if (Tok.is(tok::newline))
	consumeToken();

	SmallVector<VerbatimBlockLineComment *, 8> Lines;
	while (Tok.is(tok::verbatim_block_line) \|\|
	Tok.is(tok::newline)) {
	VerbatimBlockLineComment *Line;
	if (Tok.is(tok::verbatim_block_line)) {
	Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
	Tok.getVerbatimBlockText());
	consumeToken();
	if (Tok.is(tok::newline)) {
	consumeToken();
	}
	} else {
	// Empty line, just a tok::newline.
	Line = S.actOnVerbatimBlockLine(Tok.getLocation(), "");
	consumeToken();
	}
	Lines.push_back(Line);
	}

	if (Tok.is(tok::verbatim_block_end)) {
	const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID());
	S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),
	Info->Name,
	S.copyArray(llvm::makeArrayRef(Lines)));
	consumeToken();
	} else {
	// Unterminated \\verbatim block
	S.actOnVerbatimBlockFinish(VB, SourceLocation(), "",
	S.copyArray(llvm::makeArrayRef(Lines)));
	}

	return VB;
	}

	VerbatimLineComment *Parser::parseVerbatimLine() {
	assert(Tok.is(tok::verbatim_line_name));

	Token NameTok = Tok;
	consumeToken();

	SourceLocation TextBegin;
	StringRef Text;
	// Next token might not be a tok::verbatim_line_text if verbatim line
	// starting command comes just before a newline or comment end.
	if (Tok.is(tok::verbatim_line_text)) {
	TextBegin = Tok.getLocation();
	Text = Tok.getVerbatimLineText();
	} else {
	TextBegin = NameTok.getEndLocation();
	Text = "";
	}

	VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
	NameTok.getVerbatimLineID(),
	TextBegin,
	Text);
	consumeToken();
	return VL;
	}

	BlockContentComment *Parser::parseBlockContent() {
	switch (Tok.getKind()) {
	case tok::text:
	case tok::unknown_command:
	case tok::backslash_command:
	case tok::at_command:
	case tok::html_start_tag:
	case tok::html_end_tag:
	return parseParagraphOrBlockCommand();

	case tok::verbatim_block_begin:
	return parseVerbatimBlock();

	case tok::verbatim_line_name:
	return parseVerbatimLine();

	case tok::eof:
	case tok::newline:
	case tok::verbatim_block_line:
	case tok::verbatim_block_end:
	case tok::verbatim_line_text:
	case tok::html_ident:
	case tok::html_equals:
	case tok::html_quoted_string:
	case tok::html_greater:
	case tok::html_slash_greater:
	llvm_unreachable("should not see this token");
	}
	llvm_unreachable("bogus token kind");
	}

	FullComment *Parser::parseFullComment() {
	// Skip newlines at the beginning of the comment.
	while (Tok.is(tok::newline))
	consumeToken();

	SmallVector<BlockContentComment *, 8> Blocks;
	while (Tok.isNot(tok::eof)) {
	Blocks.push_back(parseBlockContent());

	// Skip extra newlines after paragraph end.
	while (Tok.is(tok::newline))
	consumeToken();
	}
	return S.actOnFullComment(S.copyArray(llvm::makeArrayRef(Blocks)));
	}

	} // end namespace comments
	} // end namespace clang