Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 1 | //===--- CommentParser.cpp - Doxygen comment parser -----------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | #include "clang/AST/CommentParser.h" |
| 11 | #include "clang/AST/CommentSema.h" |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 12 | #include "clang/AST/CommentDiagnostic.h" |
| 13 | #include "clang/Basic/SourceManager.h" |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 14 | #include "llvm/Support/ErrorHandling.h" |
| 15 | |
| 16 | namespace clang { |
| 17 | namespace comments { |
| 18 | |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame^] | 19 | /// Re-lexes a sequence of tok::text tokens. |
| 20 | class TextTokenRetokenizer { |
| 21 | llvm::BumpPtrAllocator &Allocator; |
| 22 | static const unsigned MaxTokens = 16; |
| 23 | SmallVector<Token, MaxTokens> Toks; |
| 24 | |
| 25 | struct Position { |
| 26 | unsigned CurToken; |
| 27 | const char *BufferStart; |
| 28 | const char *BufferEnd; |
| 29 | const char *BufferPtr; |
| 30 | SourceLocation BufferStartLoc; |
| 31 | }; |
| 32 | |
| 33 | /// Current position in Toks. |
| 34 | Position Pos; |
| 35 | |
| 36 | bool isEnd() const { |
| 37 | return Pos.CurToken >= Toks.size(); |
| 38 | } |
| 39 | |
| 40 | /// Sets up the buffer pointers to point to current token. |
| 41 | void setupBuffer() { |
| 42 | assert(Pos.CurToken < Toks.size()); |
| 43 | const Token &Tok = Toks[Pos.CurToken]; |
| 44 | |
| 45 | Pos.BufferStart = Tok.getText().begin(); |
| 46 | Pos.BufferEnd = Tok.getText().end(); |
| 47 | Pos.BufferPtr = Pos.BufferStart; |
| 48 | Pos.BufferStartLoc = Tok.getLocation(); |
| 49 | } |
| 50 | |
| 51 | SourceLocation getSourceLocation() const { |
| 52 | const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart; |
| 53 | return Pos.BufferStartLoc.getLocWithOffset(CharNo); |
| 54 | } |
| 55 | |
| 56 | char peek() const { |
| 57 | assert(!isEnd()); |
| 58 | assert(Pos.BufferPtr != Pos.BufferEnd); |
| 59 | return *Pos.BufferPtr; |
| 60 | } |
| 61 | |
| 62 | void consumeChar() { |
| 63 | assert(!isEnd()); |
| 64 | assert(Pos.BufferPtr != Pos.BufferEnd); |
| 65 | Pos.BufferPtr++; |
| 66 | if (Pos.BufferPtr == Pos.BufferEnd) { |
| 67 | Pos.CurToken++; |
| 68 | if (Pos.CurToken < Toks.size()) |
| 69 | setupBuffer(); |
| 70 | } |
| 71 | } |
| 72 | |
| 73 | static bool isWhitespace(char C) { |
| 74 | return C == ' ' || C == '\n' || C == '\r' || |
| 75 | C == '\t' || C == '\f' || C == '\v'; |
| 76 | } |
| 77 | |
| 78 | void consumeWhitespace() { |
| 79 | while (!isEnd()) { |
| 80 | if (isWhitespace(peek())) |
| 81 | consumeChar(); |
| 82 | else |
| 83 | break; |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | void formTokenWithChars(Token &Result, |
| 88 | SourceLocation Loc, |
| 89 | const char *TokBegin, |
| 90 | unsigned TokLength, |
| 91 | StringRef Text) { |
| 92 | Result.setLocation(Loc); |
| 93 | Result.setKind(tok::text); |
| 94 | Result.setLength(TokLength); |
| 95 | #ifndef NDEBUG |
| 96 | Result.TextPtr1 = "<UNSET>"; |
| 97 | Result.TextLen1 = 7; |
| 98 | #endif |
| 99 | Result.setText(Text); |
| 100 | } |
| 101 | |
| 102 | public: |
| 103 | TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator): |
| 104 | Allocator(Allocator) { |
| 105 | Pos.CurToken = 0; |
| 106 | } |
| 107 | |
| 108 | /// Add a token. |
| 109 | /// Returns true on success, false if it seems like we have enough tokens. |
| 110 | bool addToken(const Token &Tok) { |
| 111 | assert(Tok.is(tok::text)); |
| 112 | if (Toks.size() >= MaxTokens) |
| 113 | return false; |
| 114 | |
| 115 | Toks.push_back(Tok); |
| 116 | if (Toks.size() == 1) |
| 117 | setupBuffer(); |
| 118 | return true; |
| 119 | } |
| 120 | |
| 121 | /// Extract a word -- sequence of non-whitespace characters. |
| 122 | bool lexWord(Token &Tok) { |
| 123 | if (isEnd()) |
| 124 | return false; |
| 125 | |
| 126 | Position SavedPos = Pos; |
| 127 | |
| 128 | consumeWhitespace(); |
| 129 | SmallString<32> WordText; |
| 130 | const char *WordBegin = Pos.BufferPtr; |
| 131 | SourceLocation Loc = getSourceLocation(); |
| 132 | while (!isEnd()) { |
| 133 | const char C = peek(); |
| 134 | if (!isWhitespace(C)) { |
| 135 | WordText.push_back(C); |
| 136 | consumeChar(); |
| 137 | } else |
| 138 | break; |
| 139 | } |
| 140 | const unsigned Length = WordText.size(); |
| 141 | if (Length == 0) { |
| 142 | Pos = SavedPos; |
| 143 | return false; |
| 144 | } |
| 145 | |
| 146 | char *TextPtr = Allocator.Allocate<char>(Length + 1); |
| 147 | |
| 148 | memcpy(TextPtr, WordText.c_str(), Length + 1); |
| 149 | StringRef Text = StringRef(TextPtr, Length); |
| 150 | |
| 151 | formTokenWithChars(Tok, Loc, WordBegin, |
| 152 | Pos.BufferPtr - WordBegin, Text); |
| 153 | return true; |
| 154 | } |
| 155 | |
| 156 | bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) { |
| 157 | if (isEnd()) |
| 158 | return false; |
| 159 | |
| 160 | Position SavedPos = Pos; |
| 161 | |
| 162 | consumeWhitespace(); |
| 163 | SmallString<32> WordText; |
| 164 | const char *WordBegin = Pos.BufferPtr; |
| 165 | SourceLocation Loc = getSourceLocation(); |
| 166 | bool Error = false; |
| 167 | if (!isEnd()) { |
| 168 | const char C = peek(); |
| 169 | if (C == OpenDelim) { |
| 170 | WordText.push_back(C); |
| 171 | consumeChar(); |
| 172 | } else |
| 173 | Error = true; |
| 174 | } |
| 175 | char C = '\0'; |
| 176 | while (!Error && !isEnd()) { |
| 177 | C = peek(); |
| 178 | WordText.push_back(C); |
| 179 | consumeChar(); |
| 180 | if (C == CloseDelim) |
| 181 | break; |
| 182 | } |
| 183 | if (!Error && C != CloseDelim) |
| 184 | Error = true; |
| 185 | |
| 186 | if (Error) { |
| 187 | Pos = SavedPos; |
| 188 | return false; |
| 189 | } |
| 190 | |
| 191 | const unsigned Length = WordText.size(); |
| 192 | char *TextPtr = Allocator.Allocate<char>(Length + 1); |
| 193 | |
| 194 | memcpy(TextPtr, WordText.c_str(), Length + 1); |
| 195 | StringRef Text = StringRef(TextPtr, Length); |
| 196 | |
| 197 | formTokenWithChars(Tok, Loc, WordBegin, |
| 198 | Pos.BufferPtr - WordBegin, Text); |
| 199 | return true; |
| 200 | } |
| 201 | |
| 202 | /// Return a text token. Useful to take tokens back. |
| 203 | bool lexText(Token &Tok) { |
| 204 | if (isEnd()) |
| 205 | return false; |
| 206 | |
| 207 | if (Pos.BufferPtr != Pos.BufferStart) |
| 208 | formTokenWithChars(Tok, getSourceLocation(), |
| 209 | Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr, |
| 210 | StringRef(Pos.BufferPtr, |
| 211 | Pos.BufferEnd - Pos.BufferPtr)); |
| 212 | else |
| 213 | Tok = Toks[Pos.CurToken]; |
| 214 | |
| 215 | Pos.CurToken++; |
| 216 | if (Pos.CurToken < Toks.size()) |
| 217 | setupBuffer(); |
| 218 | return true; |
| 219 | } |
| 220 | }; |
| 221 | |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 222 | Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator, |
| 223 | const SourceManager &SourceMgr, DiagnosticsEngine &Diags): |
| 224 | L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags) { |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 225 | consumeToken(); |
| 226 | } |
| 227 | |
| 228 | ParamCommandComment *Parser::parseParamCommandArgs( |
| 229 | ParamCommandComment *PC, |
| 230 | TextTokenRetokenizer &Retokenizer) { |
| 231 | Token Arg; |
| 232 | // Check if argument looks like direction specification: [dir] |
| 233 | // e.g., [in], [out], [in,out] |
| 234 | if (Retokenizer.lexDelimitedSeq(Arg, '[', ']')) |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 235 | PC = S.actOnParamCommandDirectionArg(PC, |
| 236 | Arg.getLocation(), |
| 237 | Arg.getEndLocation(), |
| 238 | Arg.getText()); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 239 | |
| 240 | if (Retokenizer.lexWord(Arg)) |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 241 | PC = S.actOnParamCommandParamNameArg(PC, |
| 242 | Arg.getLocation(), |
| 243 | Arg.getEndLocation(), |
| 244 | Arg.getText()); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 245 | |
| 246 | return PC; |
| 247 | } |
| 248 | |
| 249 | BlockCommandComment *Parser::parseBlockCommandArgs( |
| 250 | BlockCommandComment *BC, |
| 251 | TextTokenRetokenizer &Retokenizer, |
| 252 | unsigned NumArgs) { |
| 253 | typedef BlockCommandComment::Argument Argument; |
Dmitri Gribenko | 814e219 | 2012-07-06 16:41:59 +0000 | [diff] [blame] | 254 | Argument *Args = |
| 255 | new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs]; |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 256 | unsigned ParsedArgs = 0; |
| 257 | Token Arg; |
| 258 | while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) { |
| 259 | Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(), |
| 260 | Arg.getEndLocation()), |
| 261 | Arg.getText()); |
| 262 | ParsedArgs++; |
| 263 | } |
| 264 | |
| 265 | return S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs)); |
| 266 | } |
| 267 | |
| 268 | BlockCommandComment *Parser::parseBlockCommand() { |
| 269 | assert(Tok.is(tok::command)); |
| 270 | |
| 271 | ParamCommandComment *PC; |
| 272 | BlockCommandComment *BC; |
| 273 | bool IsParam = false; |
| 274 | unsigned NumArgs = 0; |
| 275 | if (S.isParamCommand(Tok.getCommandName())) { |
| 276 | IsParam = true; |
| 277 | PC = S.actOnParamCommandStart(Tok.getLocation(), |
| 278 | Tok.getEndLocation(), |
| 279 | Tok.getCommandName()); |
| 280 | } else { |
| 281 | NumArgs = S.getBlockCommandNumArgs(Tok.getCommandName()); |
| 282 | BC = S.actOnBlockCommandStart(Tok.getLocation(), |
| 283 | Tok.getEndLocation(), |
| 284 | Tok.getCommandName()); |
| 285 | } |
| 286 | consumeToken(); |
| 287 | |
| 288 | if (Tok.is(tok::command) && S.isBlockCommand(Tok.getCommandName())) { |
| 289 | // Block command ahead. We can't nest block commands, so pretend that this |
| 290 | // command has an empty argument. |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 291 | ParagraphComment *PC = S.actOnParagraphComment( |
| 292 | ArrayRef<InlineContentComment *>()); |
| 293 | return S.actOnBlockCommandFinish(BC, PC); |
| 294 | } |
| 295 | |
| 296 | if (IsParam || NumArgs > 0) { |
| 297 | // In order to parse command arguments we need to retokenize a few |
| 298 | // following text tokens. |
| 299 | TextTokenRetokenizer Retokenizer(Allocator); |
| 300 | while (Tok.is(tok::text)) { |
| 301 | if (Retokenizer.addToken(Tok)) |
| 302 | consumeToken(); |
| 303 | } |
| 304 | |
| 305 | if (IsParam) |
| 306 | PC = parseParamCommandArgs(PC, Retokenizer); |
| 307 | else |
| 308 | BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs); |
| 309 | |
| 310 | // Put back tokens we didn't use. |
Dmitri Gribenko | fd93916 | 2012-07-24 16:10:47 +0000 | [diff] [blame] | 311 | SmallVector<Token, 16> TextToks; |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 312 | Token Text; |
Dmitri Gribenko | fd93916 | 2012-07-24 16:10:47 +0000 | [diff] [blame] | 313 | while (Retokenizer.lexText(Text)) { |
| 314 | TextToks.push_back(Text); |
| 315 | } |
| 316 | putBack(TextToks); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 317 | } |
| 318 | |
| 319 | BlockContentComment *Block = parseParagraphOrBlockCommand(); |
| 320 | // Since we have checked for a block command, we should have parsed a |
| 321 | // paragraph. |
| 322 | if (IsParam) |
| 323 | return S.actOnParamCommandFinish(PC, cast<ParagraphComment>(Block)); |
| 324 | else |
| 325 | return S.actOnBlockCommandFinish(BC, cast<ParagraphComment>(Block)); |
| 326 | } |
| 327 | |
| 328 | InlineCommandComment *Parser::parseInlineCommand() { |
| 329 | assert(Tok.is(tok::command)); |
| 330 | |
| 331 | const Token CommandTok = Tok; |
| 332 | consumeToken(); |
| 333 | |
| 334 | TextTokenRetokenizer Retokenizer(Allocator); |
| 335 | while (Tok.is(tok::text)) { |
| 336 | if (Retokenizer.addToken(Tok)) |
| 337 | consumeToken(); |
| 338 | } |
| 339 | |
| 340 | Token ArgTok; |
| 341 | bool ArgTokValid = Retokenizer.lexWord(ArgTok); |
| 342 | |
| 343 | InlineCommandComment *IC; |
| 344 | if (ArgTokValid) { |
| 345 | IC = S.actOnInlineCommand(CommandTok.getLocation(), |
| 346 | CommandTok.getEndLocation(), |
| 347 | CommandTok.getCommandName(), |
| 348 | ArgTok.getLocation(), |
| 349 | ArgTok.getEndLocation(), |
| 350 | ArgTok.getText()); |
| 351 | } else { |
| 352 | IC = S.actOnInlineCommand(CommandTok.getLocation(), |
| 353 | CommandTok.getEndLocation(), |
| 354 | CommandTok.getCommandName()); |
| 355 | } |
| 356 | |
| 357 | Token Text; |
| 358 | while (Retokenizer.lexText(Text)) |
| 359 | putBack(Text); |
| 360 | |
| 361 | return IC; |
| 362 | } |
| 363 | |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 364 | HTMLStartTagComment *Parser::parseHTMLStartTag() { |
| 365 | assert(Tok.is(tok::html_start_tag)); |
| 366 | HTMLStartTagComment *HST = |
| 367 | S.actOnHTMLStartTagStart(Tok.getLocation(), |
| 368 | Tok.getHTMLTagStartName()); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 369 | consumeToken(); |
| 370 | |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 371 | SmallVector<HTMLStartTagComment::Attribute, 2> Attrs; |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 372 | while (true) { |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 373 | switch (Tok.getKind()) { |
| 374 | case tok::html_ident: { |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 375 | Token Ident = Tok; |
| 376 | consumeToken(); |
| 377 | if (Tok.isNot(tok::html_equals)) { |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 378 | Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(), |
| 379 | Ident.getHTMLIdent())); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 380 | continue; |
| 381 | } |
| 382 | Token Equals = Tok; |
| 383 | consumeToken(); |
| 384 | if (Tok.isNot(tok::html_quoted_string)) { |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 385 | Diag(Tok.getLocation(), |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 386 | diag::warn_doc_html_start_tag_expected_quoted_string) |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 387 | << SourceRange(Equals.getLocation()); |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 388 | Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(), |
| 389 | Ident.getHTMLIdent())); |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 390 | while (Tok.is(tok::html_equals) || |
| 391 | Tok.is(tok::html_quoted_string)) |
| 392 | consumeToken(); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 393 | continue; |
| 394 | } |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 395 | Attrs.push_back(HTMLStartTagComment::Attribute( |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 396 | Ident.getLocation(), |
| 397 | Ident.getHTMLIdent(), |
| 398 | Equals.getLocation(), |
| 399 | SourceRange(Tok.getLocation(), |
| 400 | Tok.getEndLocation()), |
| 401 | Tok.getHTMLQuotedString())); |
| 402 | consumeToken(); |
| 403 | continue; |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 404 | } |
| 405 | |
| 406 | case tok::html_greater: |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 407 | HST = S.actOnHTMLStartTagFinish(HST, |
| 408 | copyArray(llvm::makeArrayRef(Attrs)), |
| 409 | Tok.getLocation(), |
| 410 | /* IsSelfClosing = */ false); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 411 | consumeToken(); |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 412 | return HST; |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 413 | |
| 414 | case tok::html_slash_greater: |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 415 | HST = S.actOnHTMLStartTagFinish(HST, |
| 416 | copyArray(llvm::makeArrayRef(Attrs)), |
| 417 | Tok.getLocation(), |
| 418 | /* IsSelfClosing = */ true); |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 419 | consumeToken(); |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 420 | return HST; |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 421 | |
| 422 | case tok::html_equals: |
| 423 | case tok::html_quoted_string: |
| 424 | Diag(Tok.getLocation(), |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 425 | diag::warn_doc_html_start_tag_expected_ident_or_greater); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 426 | while (Tok.is(tok::html_equals) || |
| 427 | Tok.is(tok::html_quoted_string)) |
| 428 | consumeToken(); |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 429 | if (Tok.is(tok::html_ident) || |
| 430 | Tok.is(tok::html_greater) || |
| 431 | Tok.is(tok::html_slash_greater)) |
| 432 | continue; |
| 433 | |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 434 | return S.actOnHTMLStartTagFinish(HST, |
| 435 | copyArray(llvm::makeArrayRef(Attrs)), |
| 436 | SourceLocation(), |
| 437 | /* IsSelfClosing = */ false); |
| 438 | |
| 439 | default: |
| 440 | // Not a token from an HTML start tag. Thus HTML tag prematurely ended. |
| 441 | HST = S.actOnHTMLStartTagFinish(HST, |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 442 | copyArray(llvm::makeArrayRef(Attrs)), |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 443 | SourceLocation(), |
| 444 | /* IsSelfClosing = */ false); |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 445 | bool StartLineInvalid; |
| 446 | const unsigned StartLine = SourceMgr.getPresumedLineNumber( |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 447 | HST->getLocation(), |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 448 | &StartLineInvalid); |
| 449 | bool EndLineInvalid; |
| 450 | const unsigned EndLine = SourceMgr.getPresumedLineNumber( |
| 451 | Tok.getLocation(), |
| 452 | &EndLineInvalid); |
| 453 | if (StartLineInvalid || EndLineInvalid || StartLine == EndLine) |
| 454 | Diag(Tok.getLocation(), |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 455 | diag::warn_doc_html_start_tag_expected_ident_or_greater) |
| 456 | << HST->getSourceRange(); |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 457 | else { |
| 458 | Diag(Tok.getLocation(), |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 459 | diag::warn_doc_html_start_tag_expected_ident_or_greater); |
| 460 | Diag(HST->getLocation(), diag::note_doc_html_tag_started_here) |
| 461 | << HST->getSourceRange(); |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 462 | } |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 463 | return HST; |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 464 | } |
| 465 | } |
| 466 | } |
| 467 | |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 468 | HTMLEndTagComment *Parser::parseHTMLEndTag() { |
| 469 | assert(Tok.is(tok::html_end_tag)); |
| 470 | Token TokEndTag = Tok; |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 471 | consumeToken(); |
| 472 | SourceLocation Loc; |
| 473 | if (Tok.is(tok::html_greater)) { |
| 474 | Loc = Tok.getLocation(); |
| 475 | consumeToken(); |
| 476 | } |
| 477 | |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 478 | return S.actOnHTMLEndTag(TokEndTag.getLocation(), |
| 479 | Loc, |
| 480 | TokEndTag.getHTMLTagEndName()); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 481 | } |
| 482 | |
| 483 | BlockContentComment *Parser::parseParagraphOrBlockCommand() { |
| 484 | SmallVector<InlineContentComment *, 8> Content; |
| 485 | |
| 486 | while (true) { |
| 487 | switch (Tok.getKind()) { |
| 488 | case tok::verbatim_block_begin: |
| 489 | case tok::verbatim_line_name: |
| 490 | case tok::eof: |
| 491 | assert(Content.size() != 0); |
| 492 | break; // Block content or EOF ahead, finish this parapgaph. |
| 493 | |
| 494 | case tok::command: |
| 495 | if (S.isBlockCommand(Tok.getCommandName())) { |
| 496 | if (Content.size() == 0) |
| 497 | return parseBlockCommand(); |
| 498 | break; // Block command ahead, finish this parapgaph. |
| 499 | } |
| 500 | if (S.isInlineCommand(Tok.getCommandName())) { |
| 501 | Content.push_back(parseInlineCommand()); |
| 502 | continue; |
| 503 | } |
| 504 | |
| 505 | // Not a block command, not an inline command ==> an unknown command. |
| 506 | Content.push_back(S.actOnUnknownCommand(Tok.getLocation(), |
| 507 | Tok.getEndLocation(), |
| 508 | Tok.getCommandName())); |
| 509 | consumeToken(); |
| 510 | continue; |
| 511 | |
| 512 | case tok::newline: { |
| 513 | consumeToken(); |
| 514 | if (Tok.is(tok::newline) || Tok.is(tok::eof)) { |
| 515 | consumeToken(); |
| 516 | break; // Two newlines -- end of paragraph. |
| 517 | } |
| 518 | if (Content.size() > 0) |
| 519 | Content.back()->addTrailingNewline(); |
| 520 | continue; |
| 521 | } |
| 522 | |
| 523 | // Don't deal with HTML tag soup now. |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 524 | case tok::html_start_tag: |
| 525 | Content.push_back(parseHTMLStartTag()); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 526 | continue; |
| 527 | |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 528 | case tok::html_end_tag: |
| 529 | Content.push_back(parseHTMLEndTag()); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 530 | continue; |
| 531 | |
| 532 | case tok::text: |
| 533 | Content.push_back(S.actOnText(Tok.getLocation(), |
| 534 | Tok.getEndLocation(), |
| 535 | Tok.getText())); |
| 536 | consumeToken(); |
| 537 | continue; |
| 538 | |
| 539 | case tok::verbatim_block_line: |
| 540 | case tok::verbatim_block_end: |
| 541 | case tok::verbatim_line_text: |
| 542 | case tok::html_ident: |
| 543 | case tok::html_equals: |
| 544 | case tok::html_quoted_string: |
| 545 | case tok::html_greater: |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 546 | case tok::html_slash_greater: |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 547 | llvm_unreachable("should not see this token"); |
| 548 | } |
| 549 | break; |
| 550 | } |
| 551 | |
| 552 | return S.actOnParagraphComment(copyArray(llvm::makeArrayRef(Content))); |
| 553 | } |
| 554 | |
| 555 | VerbatimBlockComment *Parser::parseVerbatimBlock() { |
| 556 | assert(Tok.is(tok::verbatim_block_begin)); |
| 557 | |
| 558 | VerbatimBlockComment *VB = |
| 559 | S.actOnVerbatimBlockStart(Tok.getLocation(), |
| 560 | Tok.getVerbatimBlockName()); |
| 561 | consumeToken(); |
| 562 | |
| 563 | // Don't create an empty line if verbatim opening command is followed |
| 564 | // by a newline. |
| 565 | if (Tok.is(tok::newline)) |
| 566 | consumeToken(); |
| 567 | |
| 568 | SmallVector<VerbatimBlockLineComment *, 8> Lines; |
| 569 | while (Tok.is(tok::verbatim_block_line) || |
| 570 | Tok.is(tok::newline)) { |
| 571 | VerbatimBlockLineComment *Line; |
| 572 | if (Tok.is(tok::verbatim_block_line)) { |
| 573 | Line = S.actOnVerbatimBlockLine(Tok.getLocation(), |
| 574 | Tok.getVerbatimBlockText()); |
| 575 | consumeToken(); |
| 576 | if (Tok.is(tok::newline)) { |
| 577 | consumeToken(); |
| 578 | } |
| 579 | } else { |
| 580 | // Empty line, just a tok::newline. |
Dmitri Gribenko | 94572c3 | 2012-07-18 21:27:38 +0000 | [diff] [blame] | 581 | Line = S.actOnVerbatimBlockLine(Tok.getLocation(), ""); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 582 | consumeToken(); |
| 583 | } |
| 584 | Lines.push_back(Line); |
| 585 | } |
| 586 | |
Dmitri Gribenko | 9f08f49 | 2012-07-20 20:18:53 +0000 | [diff] [blame] | 587 | if (Tok.is(tok::verbatim_block_end)) { |
| 588 | VB = S.actOnVerbatimBlockFinish(VB, Tok.getLocation(), |
| 589 | Tok.getVerbatimBlockName(), |
| 590 | copyArray(llvm::makeArrayRef(Lines))); |
| 591 | consumeToken(); |
| 592 | } else { |
| 593 | // Unterminated \\verbatim block |
| 594 | VB = S.actOnVerbatimBlockFinish(VB, SourceLocation(), "", |
| 595 | copyArray(llvm::makeArrayRef(Lines))); |
| 596 | } |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 597 | |
| 598 | return VB; |
| 599 | } |
| 600 | |
| 601 | VerbatimLineComment *Parser::parseVerbatimLine() { |
| 602 | assert(Tok.is(tok::verbatim_line_name)); |
| 603 | |
| 604 | Token NameTok = Tok; |
| 605 | consumeToken(); |
| 606 | |
| 607 | SourceLocation TextBegin; |
| 608 | StringRef Text; |
| 609 | // Next token might not be a tok::verbatim_line_text if verbatim line |
| 610 | // starting command comes just before a newline or comment end. |
| 611 | if (Tok.is(tok::verbatim_line_text)) { |
| 612 | TextBegin = Tok.getLocation(); |
| 613 | Text = Tok.getVerbatimLineText(); |
| 614 | } else { |
| 615 | TextBegin = NameTok.getEndLocation(); |
| 616 | Text = ""; |
| 617 | } |
| 618 | |
| 619 | VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(), |
| 620 | NameTok.getVerbatimLineName(), |
| 621 | TextBegin, |
| 622 | Text); |
| 623 | consumeToken(); |
| 624 | return VL; |
| 625 | } |
| 626 | |
| 627 | BlockContentComment *Parser::parseBlockContent() { |
| 628 | switch (Tok.getKind()) { |
| 629 | case tok::text: |
| 630 | case tok::command: |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 631 | case tok::html_start_tag: |
| 632 | case tok::html_end_tag: |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 633 | return parseParagraphOrBlockCommand(); |
| 634 | |
| 635 | case tok::verbatim_block_begin: |
| 636 | return parseVerbatimBlock(); |
| 637 | |
| 638 | case tok::verbatim_line_name: |
| 639 | return parseVerbatimLine(); |
| 640 | |
| 641 | case tok::eof: |
| 642 | case tok::newline: |
| 643 | case tok::verbatim_block_line: |
| 644 | case tok::verbatim_block_end: |
| 645 | case tok::verbatim_line_text: |
| 646 | case tok::html_ident: |
| 647 | case tok::html_equals: |
| 648 | case tok::html_quoted_string: |
| 649 | case tok::html_greater: |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 650 | case tok::html_slash_greater: |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 651 | llvm_unreachable("should not see this token"); |
| 652 | } |
Matt Beaumont-Gay | 4d48b5c | 2012-07-06 21:13:09 +0000 | [diff] [blame] | 653 | llvm_unreachable("bogus token kind"); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 654 | } |
| 655 | |
| 656 | FullComment *Parser::parseFullComment() { |
| 657 | // Skip newlines at the beginning of the comment. |
| 658 | while (Tok.is(tok::newline)) |
| 659 | consumeToken(); |
| 660 | |
| 661 | SmallVector<BlockContentComment *, 8> Blocks; |
| 662 | while (Tok.isNot(tok::eof)) { |
| 663 | Blocks.push_back(parseBlockContent()); |
| 664 | |
| 665 | // Skip extra newlines after paragraph end. |
| 666 | while (Tok.is(tok::newline)) |
| 667 | consumeToken(); |
| 668 | } |
| 669 | return S.actOnFullComment(copyArray(llvm::makeArrayRef(Blocks))); |
| 670 | } |
| 671 | |
| 672 | } // end namespace comments |
| 673 | } // end namespace clang |