Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 1 | //===--- CommentParser.cpp - Doxygen comment parser -----------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | #include "clang/AST/CommentParser.h" |
| 11 | #include "clang/AST/CommentSema.h" |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 12 | #include "clang/AST/CommentDiagnostic.h" |
| 13 | #include "clang/Basic/SourceManager.h" |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 14 | #include "llvm/Support/ErrorHandling.h" |
| 15 | |
| 16 | namespace clang { |
| 17 | namespace comments { |
| 18 | |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame] | 19 | /// Re-lexes a sequence of tok::text tokens. |
| 20 | class TextTokenRetokenizer { |
| 21 | llvm::BumpPtrAllocator &Allocator; |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame] | 22 | Parser &P; |
Dmitri Gribenko | 0c43a92 | 2012-07-24 18:23:31 +0000 | [diff] [blame] | 23 | |
| 24 | /// This flag is set when there are no more tokens we can fetch from lexer. |
| 25 | bool NoMoreInterestingTokens; |
| 26 | |
| 27 | /// Token buffer: tokens we have processed and lookahead. |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame] | 28 | SmallVector<Token, 16> Toks; |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame] | 29 | |
Dmitri Gribenko | 0c43a92 | 2012-07-24 18:23:31 +0000 | [diff] [blame] | 30 | /// A position in \c Toks. |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame] | 31 | struct Position { |
| 32 | unsigned CurToken; |
| 33 | const char *BufferStart; |
| 34 | const char *BufferEnd; |
| 35 | const char *BufferPtr; |
| 36 | SourceLocation BufferStartLoc; |
| 37 | }; |
| 38 | |
| 39 | /// Current position in Toks. |
| 40 | Position Pos; |
| 41 | |
| 42 | bool isEnd() const { |
| 43 | return Pos.CurToken >= Toks.size(); |
| 44 | } |
| 45 | |
| 46 | /// Sets up the buffer pointers to point to current token. |
| 47 | void setupBuffer() { |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame] | 48 | assert(!isEnd()); |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame] | 49 | const Token &Tok = Toks[Pos.CurToken]; |
| 50 | |
| 51 | Pos.BufferStart = Tok.getText().begin(); |
| 52 | Pos.BufferEnd = Tok.getText().end(); |
| 53 | Pos.BufferPtr = Pos.BufferStart; |
| 54 | Pos.BufferStartLoc = Tok.getLocation(); |
| 55 | } |
| 56 | |
| 57 | SourceLocation getSourceLocation() const { |
| 58 | const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart; |
| 59 | return Pos.BufferStartLoc.getLocWithOffset(CharNo); |
| 60 | } |
| 61 | |
| 62 | char peek() const { |
| 63 | assert(!isEnd()); |
| 64 | assert(Pos.BufferPtr != Pos.BufferEnd); |
| 65 | return *Pos.BufferPtr; |
| 66 | } |
| 67 | |
| 68 | void consumeChar() { |
| 69 | assert(!isEnd()); |
| 70 | assert(Pos.BufferPtr != Pos.BufferEnd); |
| 71 | Pos.BufferPtr++; |
| 72 | if (Pos.BufferPtr == Pos.BufferEnd) { |
| 73 | Pos.CurToken++; |
Dmitri Gribenko | 0c43a92 | 2012-07-24 18:23:31 +0000 | [diff] [blame] | 74 | if (isEnd() && !addToken()) |
| 75 | return; |
| 76 | |
| 77 | assert(!isEnd()); |
| 78 | setupBuffer(); |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame] | 79 | } |
| 80 | } |
| 81 | |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame] | 82 | /// Add a token. |
| 83 | /// Returns true on success, false if there are no interesting tokens to |
| 84 | /// fetch from lexer. |
| 85 | bool addToken() { |
Dmitri Gribenko | 0c43a92 | 2012-07-24 18:23:31 +0000 | [diff] [blame] | 86 | if (NoMoreInterestingTokens) |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame] | 87 | return false; |
| 88 | |
Dmitri Gribenko | 0c43a92 | 2012-07-24 18:23:31 +0000 | [diff] [blame] | 89 | if (P.Tok.is(tok::newline)) { |
| 90 | // If we see a single newline token between text tokens, skip it. |
| 91 | Token Newline = P.Tok; |
| 92 | P.consumeToken(); |
| 93 | if (P.Tok.isNot(tok::text)) { |
| 94 | P.putBack(Newline); |
| 95 | NoMoreInterestingTokens = true; |
| 96 | return false; |
| 97 | } |
| 98 | } |
| 99 | if (P.Tok.isNot(tok::text)) { |
| 100 | NoMoreInterestingTokens = true; |
| 101 | return false; |
| 102 | } |
| 103 | |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame] | 104 | Toks.push_back(P.Tok); |
| 105 | P.consumeToken(); |
| 106 | if (Toks.size() == 1) |
| 107 | setupBuffer(); |
| 108 | return true; |
| 109 | } |
| 110 | |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame] | 111 | static bool isWhitespace(char C) { |
| 112 | return C == ' ' || C == '\n' || C == '\r' || |
| 113 | C == '\t' || C == '\f' || C == '\v'; |
| 114 | } |
| 115 | |
| 116 | void consumeWhitespace() { |
| 117 | while (!isEnd()) { |
| 118 | if (isWhitespace(peek())) |
| 119 | consumeChar(); |
| 120 | else |
| 121 | break; |
| 122 | } |
| 123 | } |
| 124 | |
| 125 | void formTokenWithChars(Token &Result, |
| 126 | SourceLocation Loc, |
| 127 | const char *TokBegin, |
| 128 | unsigned TokLength, |
| 129 | StringRef Text) { |
| 130 | Result.setLocation(Loc); |
| 131 | Result.setKind(tok::text); |
| 132 | Result.setLength(TokLength); |
| 133 | #ifndef NDEBUG |
| 134 | Result.TextPtr1 = "<UNSET>"; |
| 135 | Result.TextLen1 = 7; |
| 136 | #endif |
| 137 | Result.setText(Text); |
| 138 | } |
| 139 | |
| 140 | public: |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame] | 141 | TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P): |
Dmitri Gribenko | 0c43a92 | 2012-07-24 18:23:31 +0000 | [diff] [blame] | 142 | Allocator(Allocator), P(P), NoMoreInterestingTokens(false) { |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame] | 143 | Pos.CurToken = 0; |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame] | 144 | addToken(); |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame] | 145 | } |
| 146 | |
| 147 | /// Extract a word -- sequence of non-whitespace characters. |
| 148 | bool lexWord(Token &Tok) { |
| 149 | if (isEnd()) |
| 150 | return false; |
| 151 | |
| 152 | Position SavedPos = Pos; |
| 153 | |
| 154 | consumeWhitespace(); |
| 155 | SmallString<32> WordText; |
| 156 | const char *WordBegin = Pos.BufferPtr; |
| 157 | SourceLocation Loc = getSourceLocation(); |
| 158 | while (!isEnd()) { |
| 159 | const char C = peek(); |
| 160 | if (!isWhitespace(C)) { |
| 161 | WordText.push_back(C); |
| 162 | consumeChar(); |
| 163 | } else |
| 164 | break; |
| 165 | } |
| 166 | const unsigned Length = WordText.size(); |
| 167 | if (Length == 0) { |
| 168 | Pos = SavedPos; |
| 169 | return false; |
| 170 | } |
| 171 | |
| 172 | char *TextPtr = Allocator.Allocate<char>(Length + 1); |
| 173 | |
| 174 | memcpy(TextPtr, WordText.c_str(), Length + 1); |
| 175 | StringRef Text = StringRef(TextPtr, Length); |
| 176 | |
| 177 | formTokenWithChars(Tok, Loc, WordBegin, |
| 178 | Pos.BufferPtr - WordBegin, Text); |
| 179 | return true; |
| 180 | } |
| 181 | |
| 182 | bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) { |
| 183 | if (isEnd()) |
| 184 | return false; |
| 185 | |
| 186 | Position SavedPos = Pos; |
| 187 | |
| 188 | consumeWhitespace(); |
| 189 | SmallString<32> WordText; |
| 190 | const char *WordBegin = Pos.BufferPtr; |
| 191 | SourceLocation Loc = getSourceLocation(); |
| 192 | bool Error = false; |
| 193 | if (!isEnd()) { |
| 194 | const char C = peek(); |
| 195 | if (C == OpenDelim) { |
| 196 | WordText.push_back(C); |
| 197 | consumeChar(); |
| 198 | } else |
| 199 | Error = true; |
| 200 | } |
| 201 | char C = '\0'; |
| 202 | while (!Error && !isEnd()) { |
| 203 | C = peek(); |
| 204 | WordText.push_back(C); |
| 205 | consumeChar(); |
| 206 | if (C == CloseDelim) |
| 207 | break; |
| 208 | } |
| 209 | if (!Error && C != CloseDelim) |
| 210 | Error = true; |
| 211 | |
| 212 | if (Error) { |
| 213 | Pos = SavedPos; |
| 214 | return false; |
| 215 | } |
| 216 | |
| 217 | const unsigned Length = WordText.size(); |
| 218 | char *TextPtr = Allocator.Allocate<char>(Length + 1); |
| 219 | |
| 220 | memcpy(TextPtr, WordText.c_str(), Length + 1); |
| 221 | StringRef Text = StringRef(TextPtr, Length); |
| 222 | |
| 223 | formTokenWithChars(Tok, Loc, WordBegin, |
| 224 | Pos.BufferPtr - WordBegin, Text); |
| 225 | return true; |
| 226 | } |
| 227 | |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame] | 228 | /// Put back tokens that we didn't consume. |
| 229 | void putBackLeftoverTokens() { |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame] | 230 | if (isEnd()) |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame] | 231 | return; |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame] | 232 | |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame] | 233 | bool HavePartialTok = false; |
| 234 | Token PartialTok; |
| 235 | if (Pos.BufferPtr != Pos.BufferStart) { |
| 236 | formTokenWithChars(PartialTok, getSourceLocation(), |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame] | 237 | Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr, |
| 238 | StringRef(Pos.BufferPtr, |
| 239 | Pos.BufferEnd - Pos.BufferPtr)); |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame] | 240 | HavePartialTok = true; |
| 241 | Pos.CurToken++; |
| 242 | } |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame] | 243 | |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame] | 244 | P.putBack(llvm::makeArrayRef(Toks.begin() + Pos.CurToken, Toks.end())); |
| 245 | Pos.CurToken = Toks.size(); |
| 246 | |
| 247 | if (HavePartialTok) |
| 248 | P.putBack(PartialTok); |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame] | 249 | } |
| 250 | }; |
| 251 | |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 252 | Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator, |
| 253 | const SourceManager &SourceMgr, DiagnosticsEngine &Diags): |
| 254 | L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags) { |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 255 | consumeToken(); |
| 256 | } |
| 257 | |
| 258 | ParamCommandComment *Parser::parseParamCommandArgs( |
| 259 | ParamCommandComment *PC, |
| 260 | TextTokenRetokenizer &Retokenizer) { |
| 261 | Token Arg; |
| 262 | // Check if argument looks like direction specification: [dir] |
| 263 | // e.g., [in], [out], [in,out] |
| 264 | if (Retokenizer.lexDelimitedSeq(Arg, '[', ']')) |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 265 | PC = S.actOnParamCommandDirectionArg(PC, |
| 266 | Arg.getLocation(), |
| 267 | Arg.getEndLocation(), |
| 268 | Arg.getText()); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 269 | |
| 270 | if (Retokenizer.lexWord(Arg)) |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 271 | PC = S.actOnParamCommandParamNameArg(PC, |
| 272 | Arg.getLocation(), |
| 273 | Arg.getEndLocation(), |
| 274 | Arg.getText()); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 275 | |
| 276 | return PC; |
| 277 | } |
| 278 | |
Dmitri Gribenko | 96b0986 | 2012-07-31 22:37:06 +0000 | [diff] [blame^] | 279 | TParamCommandComment *Parser::parseTParamCommandArgs( |
| 280 | TParamCommandComment *TPC, |
| 281 | TextTokenRetokenizer &Retokenizer) { |
| 282 | Token Arg; |
| 283 | if (Retokenizer.lexWord(Arg)) |
| 284 | TPC = S.actOnTParamCommandParamNameArg(TPC, |
| 285 | Arg.getLocation(), |
| 286 | Arg.getEndLocation(), |
| 287 | Arg.getText()); |
| 288 | |
| 289 | return TPC; |
| 290 | } |
| 291 | |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 292 | BlockCommandComment *Parser::parseBlockCommandArgs( |
| 293 | BlockCommandComment *BC, |
| 294 | TextTokenRetokenizer &Retokenizer, |
| 295 | unsigned NumArgs) { |
| 296 | typedef BlockCommandComment::Argument Argument; |
Dmitri Gribenko | 814e219 | 2012-07-06 16:41:59 +0000 | [diff] [blame] | 297 | Argument *Args = |
| 298 | new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs]; |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 299 | unsigned ParsedArgs = 0; |
| 300 | Token Arg; |
| 301 | while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) { |
| 302 | Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(), |
| 303 | Arg.getEndLocation()), |
| 304 | Arg.getText()); |
| 305 | ParsedArgs++; |
| 306 | } |
| 307 | |
| 308 | return S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs)); |
| 309 | } |
| 310 | |
| 311 | BlockCommandComment *Parser::parseBlockCommand() { |
| 312 | assert(Tok.is(tok::command)); |
| 313 | |
| 314 | ParamCommandComment *PC; |
Dmitri Gribenko | 96b0986 | 2012-07-31 22:37:06 +0000 | [diff] [blame^] | 315 | TParamCommandComment *TPC; |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 316 | BlockCommandComment *BC; |
| 317 | bool IsParam = false; |
Dmitri Gribenko | 96b0986 | 2012-07-31 22:37:06 +0000 | [diff] [blame^] | 318 | bool IsTParam = false; |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 319 | unsigned NumArgs = 0; |
| 320 | if (S.isParamCommand(Tok.getCommandName())) { |
| 321 | IsParam = true; |
| 322 | PC = S.actOnParamCommandStart(Tok.getLocation(), |
| 323 | Tok.getEndLocation(), |
| 324 | Tok.getCommandName()); |
Dmitri Gribenko | 96b0986 | 2012-07-31 22:37:06 +0000 | [diff] [blame^] | 325 | } if (S.isTParamCommand(Tok.getCommandName())) { |
| 326 | IsTParam = true; |
| 327 | TPC = S.actOnTParamCommandStart(Tok.getLocation(), |
| 328 | Tok.getEndLocation(), |
| 329 | Tok.getCommandName()); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 330 | } else { |
| 331 | NumArgs = S.getBlockCommandNumArgs(Tok.getCommandName()); |
| 332 | BC = S.actOnBlockCommandStart(Tok.getLocation(), |
| 333 | Tok.getEndLocation(), |
| 334 | Tok.getCommandName()); |
| 335 | } |
| 336 | consumeToken(); |
| 337 | |
| 338 | if (Tok.is(tok::command) && S.isBlockCommand(Tok.getCommandName())) { |
| 339 | // Block command ahead. We can't nest block commands, so pretend that this |
| 340 | // command has an empty argument. |
Dmitri Gribenko | e5deb79 | 2012-07-30 18:05:28 +0000 | [diff] [blame] | 341 | ParagraphComment *Paragraph = S.actOnParagraphComment( |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 342 | ArrayRef<InlineContentComment *>()); |
Dmitri Gribenko | e5deb79 | 2012-07-30 18:05:28 +0000 | [diff] [blame] | 343 | return S.actOnBlockCommandFinish(IsParam ? PC : BC, Paragraph); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 344 | } |
| 345 | |
Dmitri Gribenko | 96b0986 | 2012-07-31 22:37:06 +0000 | [diff] [blame^] | 346 | if (IsParam || IsTParam || NumArgs > 0) { |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 347 | // In order to parse command arguments we need to retokenize a few |
| 348 | // following text tokens. |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame] | 349 | TextTokenRetokenizer Retokenizer(Allocator, *this); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 350 | |
| 351 | if (IsParam) |
| 352 | PC = parseParamCommandArgs(PC, Retokenizer); |
Dmitri Gribenko | 96b0986 | 2012-07-31 22:37:06 +0000 | [diff] [blame^] | 353 | else if (IsTParam) |
| 354 | TPC = parseTParamCommandArgs(TPC, Retokenizer); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 355 | else |
| 356 | BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs); |
| 357 | |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame] | 358 | Retokenizer.putBackLeftoverTokens(); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 359 | } |
| 360 | |
| 361 | BlockContentComment *Block = parseParagraphOrBlockCommand(); |
| 362 | // Since we have checked for a block command, we should have parsed a |
| 363 | // paragraph. |
| 364 | if (IsParam) |
| 365 | return S.actOnParamCommandFinish(PC, cast<ParagraphComment>(Block)); |
Dmitri Gribenko | 96b0986 | 2012-07-31 22:37:06 +0000 | [diff] [blame^] | 366 | else if (IsTParam) |
| 367 | return S.actOnTParamCommandFinish(TPC, cast<ParagraphComment>(Block)); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 368 | else |
| 369 | return S.actOnBlockCommandFinish(BC, cast<ParagraphComment>(Block)); |
| 370 | } |
| 371 | |
| 372 | InlineCommandComment *Parser::parseInlineCommand() { |
| 373 | assert(Tok.is(tok::command)); |
| 374 | |
| 375 | const Token CommandTok = Tok; |
| 376 | consumeToken(); |
| 377 | |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame] | 378 | TextTokenRetokenizer Retokenizer(Allocator, *this); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 379 | |
| 380 | Token ArgTok; |
| 381 | bool ArgTokValid = Retokenizer.lexWord(ArgTok); |
| 382 | |
| 383 | InlineCommandComment *IC; |
| 384 | if (ArgTokValid) { |
| 385 | IC = S.actOnInlineCommand(CommandTok.getLocation(), |
| 386 | CommandTok.getEndLocation(), |
| 387 | CommandTok.getCommandName(), |
| 388 | ArgTok.getLocation(), |
| 389 | ArgTok.getEndLocation(), |
| 390 | ArgTok.getText()); |
| 391 | } else { |
| 392 | IC = S.actOnInlineCommand(CommandTok.getLocation(), |
| 393 | CommandTok.getEndLocation(), |
| 394 | CommandTok.getCommandName()); |
| 395 | } |
| 396 | |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame] | 397 | Retokenizer.putBackLeftoverTokens(); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 398 | |
| 399 | return IC; |
| 400 | } |
| 401 | |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 402 | HTMLStartTagComment *Parser::parseHTMLStartTag() { |
| 403 | assert(Tok.is(tok::html_start_tag)); |
| 404 | HTMLStartTagComment *HST = |
| 405 | S.actOnHTMLStartTagStart(Tok.getLocation(), |
| 406 | Tok.getHTMLTagStartName()); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 407 | consumeToken(); |
| 408 | |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 409 | SmallVector<HTMLStartTagComment::Attribute, 2> Attrs; |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 410 | while (true) { |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 411 | switch (Tok.getKind()) { |
| 412 | case tok::html_ident: { |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 413 | Token Ident = Tok; |
| 414 | consumeToken(); |
| 415 | if (Tok.isNot(tok::html_equals)) { |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 416 | Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(), |
| 417 | Ident.getHTMLIdent())); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 418 | continue; |
| 419 | } |
| 420 | Token Equals = Tok; |
| 421 | consumeToken(); |
| 422 | if (Tok.isNot(tok::html_quoted_string)) { |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 423 | Diag(Tok.getLocation(), |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 424 | diag::warn_doc_html_start_tag_expected_quoted_string) |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 425 | << SourceRange(Equals.getLocation()); |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 426 | Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(), |
| 427 | Ident.getHTMLIdent())); |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 428 | while (Tok.is(tok::html_equals) || |
| 429 | Tok.is(tok::html_quoted_string)) |
| 430 | consumeToken(); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 431 | continue; |
| 432 | } |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 433 | Attrs.push_back(HTMLStartTagComment::Attribute( |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 434 | Ident.getLocation(), |
| 435 | Ident.getHTMLIdent(), |
| 436 | Equals.getLocation(), |
| 437 | SourceRange(Tok.getLocation(), |
| 438 | Tok.getEndLocation()), |
| 439 | Tok.getHTMLQuotedString())); |
| 440 | consumeToken(); |
| 441 | continue; |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 442 | } |
| 443 | |
| 444 | case tok::html_greater: |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 445 | HST = S.actOnHTMLStartTagFinish(HST, |
Dmitri Gribenko | 96b0986 | 2012-07-31 22:37:06 +0000 | [diff] [blame^] | 446 | S.copyArray(llvm::makeArrayRef(Attrs)), |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 447 | Tok.getLocation(), |
| 448 | /* IsSelfClosing = */ false); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 449 | consumeToken(); |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 450 | return HST; |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 451 | |
| 452 | case tok::html_slash_greater: |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 453 | HST = S.actOnHTMLStartTagFinish(HST, |
Dmitri Gribenko | 96b0986 | 2012-07-31 22:37:06 +0000 | [diff] [blame^] | 454 | S.copyArray(llvm::makeArrayRef(Attrs)), |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 455 | Tok.getLocation(), |
| 456 | /* IsSelfClosing = */ true); |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 457 | consumeToken(); |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 458 | return HST; |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 459 | |
| 460 | case tok::html_equals: |
| 461 | case tok::html_quoted_string: |
| 462 | Diag(Tok.getLocation(), |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 463 | diag::warn_doc_html_start_tag_expected_ident_or_greater); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 464 | while (Tok.is(tok::html_equals) || |
| 465 | Tok.is(tok::html_quoted_string)) |
| 466 | consumeToken(); |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 467 | if (Tok.is(tok::html_ident) || |
| 468 | Tok.is(tok::html_greater) || |
| 469 | Tok.is(tok::html_slash_greater)) |
| 470 | continue; |
| 471 | |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 472 | return S.actOnHTMLStartTagFinish(HST, |
Dmitri Gribenko | 96b0986 | 2012-07-31 22:37:06 +0000 | [diff] [blame^] | 473 | S.copyArray(llvm::makeArrayRef(Attrs)), |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 474 | SourceLocation(), |
| 475 | /* IsSelfClosing = */ false); |
| 476 | |
| 477 | default: |
| 478 | // Not a token from an HTML start tag. Thus HTML tag prematurely ended. |
| 479 | HST = S.actOnHTMLStartTagFinish(HST, |
Dmitri Gribenko | 96b0986 | 2012-07-31 22:37:06 +0000 | [diff] [blame^] | 480 | S.copyArray(llvm::makeArrayRef(Attrs)), |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 481 | SourceLocation(), |
| 482 | /* IsSelfClosing = */ false); |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 483 | bool StartLineInvalid; |
| 484 | const unsigned StartLine = SourceMgr.getPresumedLineNumber( |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 485 | HST->getLocation(), |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 486 | &StartLineInvalid); |
| 487 | bool EndLineInvalid; |
| 488 | const unsigned EndLine = SourceMgr.getPresumedLineNumber( |
| 489 | Tok.getLocation(), |
| 490 | &EndLineInvalid); |
| 491 | if (StartLineInvalid || EndLineInvalid || StartLine == EndLine) |
| 492 | Diag(Tok.getLocation(), |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 493 | diag::warn_doc_html_start_tag_expected_ident_or_greater) |
| 494 | << HST->getSourceRange(); |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 495 | else { |
| 496 | Diag(Tok.getLocation(), |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 497 | diag::warn_doc_html_start_tag_expected_ident_or_greater); |
| 498 | Diag(HST->getLocation(), diag::note_doc_html_tag_started_here) |
| 499 | << HST->getSourceRange(); |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 500 | } |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 501 | return HST; |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 502 | } |
| 503 | } |
| 504 | } |
| 505 | |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 506 | HTMLEndTagComment *Parser::parseHTMLEndTag() { |
| 507 | assert(Tok.is(tok::html_end_tag)); |
| 508 | Token TokEndTag = Tok; |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 509 | consumeToken(); |
| 510 | SourceLocation Loc; |
| 511 | if (Tok.is(tok::html_greater)) { |
| 512 | Loc = Tok.getLocation(); |
| 513 | consumeToken(); |
| 514 | } |
| 515 | |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 516 | return S.actOnHTMLEndTag(TokEndTag.getLocation(), |
| 517 | Loc, |
| 518 | TokEndTag.getHTMLTagEndName()); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 519 | } |
| 520 | |
| 521 | BlockContentComment *Parser::parseParagraphOrBlockCommand() { |
| 522 | SmallVector<InlineContentComment *, 8> Content; |
| 523 | |
| 524 | while (true) { |
| 525 | switch (Tok.getKind()) { |
| 526 | case tok::verbatim_block_begin: |
| 527 | case tok::verbatim_line_name: |
| 528 | case tok::eof: |
| 529 | assert(Content.size() != 0); |
| 530 | break; // Block content or EOF ahead, finish this parapgaph. |
| 531 | |
| 532 | case tok::command: |
| 533 | if (S.isBlockCommand(Tok.getCommandName())) { |
| 534 | if (Content.size() == 0) |
| 535 | return parseBlockCommand(); |
| 536 | break; // Block command ahead, finish this parapgaph. |
| 537 | } |
| 538 | if (S.isInlineCommand(Tok.getCommandName())) { |
| 539 | Content.push_back(parseInlineCommand()); |
| 540 | continue; |
| 541 | } |
| 542 | |
| 543 | // Not a block command, not an inline command ==> an unknown command. |
| 544 | Content.push_back(S.actOnUnknownCommand(Tok.getLocation(), |
| 545 | Tok.getEndLocation(), |
| 546 | Tok.getCommandName())); |
| 547 | consumeToken(); |
| 548 | continue; |
| 549 | |
| 550 | case tok::newline: { |
| 551 | consumeToken(); |
| 552 | if (Tok.is(tok::newline) || Tok.is(tok::eof)) { |
| 553 | consumeToken(); |
| 554 | break; // Two newlines -- end of paragraph. |
| 555 | } |
| 556 | if (Content.size() > 0) |
| 557 | Content.back()->addTrailingNewline(); |
| 558 | continue; |
| 559 | } |
| 560 | |
| 561 | // Don't deal with HTML tag soup now. |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 562 | case tok::html_start_tag: |
| 563 | Content.push_back(parseHTMLStartTag()); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 564 | continue; |
| 565 | |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 566 | case tok::html_end_tag: |
| 567 | Content.push_back(parseHTMLEndTag()); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 568 | continue; |
| 569 | |
| 570 | case tok::text: |
| 571 | Content.push_back(S.actOnText(Tok.getLocation(), |
| 572 | Tok.getEndLocation(), |
| 573 | Tok.getText())); |
| 574 | consumeToken(); |
| 575 | continue; |
| 576 | |
| 577 | case tok::verbatim_block_line: |
| 578 | case tok::verbatim_block_end: |
| 579 | case tok::verbatim_line_text: |
| 580 | case tok::html_ident: |
| 581 | case tok::html_equals: |
| 582 | case tok::html_quoted_string: |
| 583 | case tok::html_greater: |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 584 | case tok::html_slash_greater: |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 585 | llvm_unreachable("should not see this token"); |
| 586 | } |
| 587 | break; |
| 588 | } |
| 589 | |
Dmitri Gribenko | 96b0986 | 2012-07-31 22:37:06 +0000 | [diff] [blame^] | 590 | return S.actOnParagraphComment(S.copyArray(llvm::makeArrayRef(Content))); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 591 | } |
| 592 | |
| 593 | VerbatimBlockComment *Parser::parseVerbatimBlock() { |
| 594 | assert(Tok.is(tok::verbatim_block_begin)); |
| 595 | |
| 596 | VerbatimBlockComment *VB = |
| 597 | S.actOnVerbatimBlockStart(Tok.getLocation(), |
| 598 | Tok.getVerbatimBlockName()); |
| 599 | consumeToken(); |
| 600 | |
| 601 | // Don't create an empty line if verbatim opening command is followed |
| 602 | // by a newline. |
| 603 | if (Tok.is(tok::newline)) |
| 604 | consumeToken(); |
| 605 | |
| 606 | SmallVector<VerbatimBlockLineComment *, 8> Lines; |
| 607 | while (Tok.is(tok::verbatim_block_line) || |
| 608 | Tok.is(tok::newline)) { |
| 609 | VerbatimBlockLineComment *Line; |
| 610 | if (Tok.is(tok::verbatim_block_line)) { |
| 611 | Line = S.actOnVerbatimBlockLine(Tok.getLocation(), |
| 612 | Tok.getVerbatimBlockText()); |
| 613 | consumeToken(); |
| 614 | if (Tok.is(tok::newline)) { |
| 615 | consumeToken(); |
| 616 | } |
| 617 | } else { |
| 618 | // Empty line, just a tok::newline. |
Dmitri Gribenko | 94572c3 | 2012-07-18 21:27:38 +0000 | [diff] [blame] | 619 | Line = S.actOnVerbatimBlockLine(Tok.getLocation(), ""); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 620 | consumeToken(); |
| 621 | } |
| 622 | Lines.push_back(Line); |
| 623 | } |
| 624 | |
Dmitri Gribenko | 9f08f49 | 2012-07-20 20:18:53 +0000 | [diff] [blame] | 625 | if (Tok.is(tok::verbatim_block_end)) { |
| 626 | VB = S.actOnVerbatimBlockFinish(VB, Tok.getLocation(), |
| 627 | Tok.getVerbatimBlockName(), |
Dmitri Gribenko | 96b0986 | 2012-07-31 22:37:06 +0000 | [diff] [blame^] | 628 | S.copyArray(llvm::makeArrayRef(Lines))); |
Dmitri Gribenko | 9f08f49 | 2012-07-20 20:18:53 +0000 | [diff] [blame] | 629 | consumeToken(); |
| 630 | } else { |
| 631 | // Unterminated \\verbatim block |
| 632 | VB = S.actOnVerbatimBlockFinish(VB, SourceLocation(), "", |
Dmitri Gribenko | 96b0986 | 2012-07-31 22:37:06 +0000 | [diff] [blame^] | 633 | S.copyArray(llvm::makeArrayRef(Lines))); |
Dmitri Gribenko | 9f08f49 | 2012-07-20 20:18:53 +0000 | [diff] [blame] | 634 | } |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 635 | |
| 636 | return VB; |
| 637 | } |
| 638 | |
| 639 | VerbatimLineComment *Parser::parseVerbatimLine() { |
| 640 | assert(Tok.is(tok::verbatim_line_name)); |
| 641 | |
| 642 | Token NameTok = Tok; |
| 643 | consumeToken(); |
| 644 | |
| 645 | SourceLocation TextBegin; |
| 646 | StringRef Text; |
| 647 | // Next token might not be a tok::verbatim_line_text if verbatim line |
| 648 | // starting command comes just before a newline or comment end. |
| 649 | if (Tok.is(tok::verbatim_line_text)) { |
| 650 | TextBegin = Tok.getLocation(); |
| 651 | Text = Tok.getVerbatimLineText(); |
| 652 | } else { |
| 653 | TextBegin = NameTok.getEndLocation(); |
| 654 | Text = ""; |
| 655 | } |
| 656 | |
| 657 | VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(), |
| 658 | NameTok.getVerbatimLineName(), |
| 659 | TextBegin, |
| 660 | Text); |
| 661 | consumeToken(); |
| 662 | return VL; |
| 663 | } |
| 664 | |
| 665 | BlockContentComment *Parser::parseBlockContent() { |
| 666 | switch (Tok.getKind()) { |
| 667 | case tok::text: |
| 668 | case tok::command: |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 669 | case tok::html_start_tag: |
| 670 | case tok::html_end_tag: |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 671 | return parseParagraphOrBlockCommand(); |
| 672 | |
| 673 | case tok::verbatim_block_begin: |
| 674 | return parseVerbatimBlock(); |
| 675 | |
| 676 | case tok::verbatim_line_name: |
| 677 | return parseVerbatimLine(); |
| 678 | |
| 679 | case tok::eof: |
| 680 | case tok::newline: |
| 681 | case tok::verbatim_block_line: |
| 682 | case tok::verbatim_block_end: |
| 683 | case tok::verbatim_line_text: |
| 684 | case tok::html_ident: |
| 685 | case tok::html_equals: |
| 686 | case tok::html_quoted_string: |
| 687 | case tok::html_greater: |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 688 | case tok::html_slash_greater: |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 689 | llvm_unreachable("should not see this token"); |
| 690 | } |
Matt Beaumont-Gay | 4d48b5c | 2012-07-06 21:13:09 +0000 | [diff] [blame] | 691 | llvm_unreachable("bogus token kind"); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 692 | } |
| 693 | |
| 694 | FullComment *Parser::parseFullComment() { |
| 695 | // Skip newlines at the beginning of the comment. |
| 696 | while (Tok.is(tok::newline)) |
| 697 | consumeToken(); |
| 698 | |
| 699 | SmallVector<BlockContentComment *, 8> Blocks; |
| 700 | while (Tok.isNot(tok::eof)) { |
| 701 | Blocks.push_back(parseBlockContent()); |
| 702 | |
| 703 | // Skip extra newlines after paragraph end. |
| 704 | while (Tok.is(tok::newline)) |
| 705 | consumeToken(); |
| 706 | } |
Dmitri Gribenko | 96b0986 | 2012-07-31 22:37:06 +0000 | [diff] [blame^] | 707 | return S.actOnFullComment(S.copyArray(llvm::makeArrayRef(Blocks))); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 708 | } |
| 709 | |
| 710 | } // end namespace comments |
| 711 | } // end namespace clang |