Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame^] | 1 | //===--- CommentParser.cpp - Doxygen comment parser -----------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | #include "clang/AST/CommentParser.h" |
| 11 | #include "clang/AST/CommentSema.h" |
| 12 | #include "llvm/Support/ErrorHandling.h" |
| 13 | |
| 14 | namespace clang { |
| 15 | namespace comments { |
| 16 | |
| 17 | Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator): |
| 18 | L(L), S(S), Allocator(Allocator) { |
| 19 | consumeToken(); |
| 20 | } |
| 21 | |
| 22 | ParamCommandComment *Parser::parseParamCommandArgs( |
| 23 | ParamCommandComment *PC, |
| 24 | TextTokenRetokenizer &Retokenizer) { |
| 25 | Token Arg; |
| 26 | // Check if argument looks like direction specification: [dir] |
| 27 | // e.g., [in], [out], [in,out] |
| 28 | if (Retokenizer.lexDelimitedSeq(Arg, '[', ']')) |
| 29 | PC = S.actOnParamCommandArg(PC, |
| 30 | Arg.getLocation(), |
| 31 | Arg.getEndLocation(), |
| 32 | Arg.getText(), |
| 33 | /* IsDirection = */ true); |
| 34 | |
| 35 | if (Retokenizer.lexWord(Arg)) |
| 36 | StringRef ArgText = Arg.getText(); |
| 37 | PC = S.actOnParamCommandArg(PC, |
| 38 | Arg.getLocation(), |
| 39 | Arg.getEndLocation(), |
| 40 | Arg.getText(), |
| 41 | /* IsDirection = */ false); |
| 42 | |
| 43 | return PC; |
| 44 | } |
| 45 | |
| 46 | BlockCommandComment *Parser::parseBlockCommandArgs( |
| 47 | BlockCommandComment *BC, |
| 48 | TextTokenRetokenizer &Retokenizer, |
| 49 | unsigned NumArgs) { |
| 50 | typedef BlockCommandComment::Argument Argument; |
| 51 | Argument *Args = new (Allocator) Argument[NumArgs]; |
| 52 | unsigned ParsedArgs = 0; |
| 53 | Token Arg; |
| 54 | while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) { |
| 55 | Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(), |
| 56 | Arg.getEndLocation()), |
| 57 | Arg.getText()); |
| 58 | ParsedArgs++; |
| 59 | } |
| 60 | |
| 61 | return S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs)); |
| 62 | } |
| 63 | |
| 64 | BlockCommandComment *Parser::parseBlockCommand() { |
| 65 | assert(Tok.is(tok::command)); |
| 66 | |
| 67 | ParamCommandComment *PC; |
| 68 | BlockCommandComment *BC; |
| 69 | bool IsParam = false; |
| 70 | unsigned NumArgs = 0; |
| 71 | if (S.isParamCommand(Tok.getCommandName())) { |
| 72 | IsParam = true; |
| 73 | PC = S.actOnParamCommandStart(Tok.getLocation(), |
| 74 | Tok.getEndLocation(), |
| 75 | Tok.getCommandName()); |
| 76 | } else { |
| 77 | NumArgs = S.getBlockCommandNumArgs(Tok.getCommandName()); |
| 78 | BC = S.actOnBlockCommandStart(Tok.getLocation(), |
| 79 | Tok.getEndLocation(), |
| 80 | Tok.getCommandName()); |
| 81 | } |
| 82 | consumeToken(); |
| 83 | |
| 84 | if (Tok.is(tok::command) && S.isBlockCommand(Tok.getCommandName())) { |
| 85 | // Block command ahead. We can't nest block commands, so pretend that this |
| 86 | // command has an empty argument. |
| 87 | // TODO: Diag() Warn empty arg to block command |
| 88 | ParagraphComment *PC = S.actOnParagraphComment( |
| 89 | ArrayRef<InlineContentComment *>()); |
| 90 | return S.actOnBlockCommandFinish(BC, PC); |
| 91 | } |
| 92 | |
| 93 | if (IsParam || NumArgs > 0) { |
| 94 | // In order to parse command arguments we need to retokenize a few |
| 95 | // following text tokens. |
| 96 | TextTokenRetokenizer Retokenizer(Allocator); |
| 97 | while (Tok.is(tok::text)) { |
| 98 | if (Retokenizer.addToken(Tok)) |
| 99 | consumeToken(); |
| 100 | } |
| 101 | |
| 102 | if (IsParam) |
| 103 | PC = parseParamCommandArgs(PC, Retokenizer); |
| 104 | else |
| 105 | BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs); |
| 106 | |
| 107 | // Put back tokens we didn't use. |
| 108 | Token Text; |
| 109 | while (Retokenizer.lexText(Text)) |
| 110 | putBack(Text); |
| 111 | } |
| 112 | |
| 113 | BlockContentComment *Block = parseParagraphOrBlockCommand(); |
| 114 | // Since we have checked for a block command, we should have parsed a |
| 115 | // paragraph. |
| 116 | if (IsParam) |
| 117 | return S.actOnParamCommandFinish(PC, cast<ParagraphComment>(Block)); |
| 118 | else |
| 119 | return S.actOnBlockCommandFinish(BC, cast<ParagraphComment>(Block)); |
| 120 | } |
| 121 | |
| 122 | InlineCommandComment *Parser::parseInlineCommand() { |
| 123 | assert(Tok.is(tok::command)); |
| 124 | |
| 125 | const Token CommandTok = Tok; |
| 126 | consumeToken(); |
| 127 | |
| 128 | TextTokenRetokenizer Retokenizer(Allocator); |
| 129 | while (Tok.is(tok::text)) { |
| 130 | if (Retokenizer.addToken(Tok)) |
| 131 | consumeToken(); |
| 132 | } |
| 133 | |
| 134 | Token ArgTok; |
| 135 | bool ArgTokValid = Retokenizer.lexWord(ArgTok); |
| 136 | |
| 137 | InlineCommandComment *IC; |
| 138 | if (ArgTokValid) { |
| 139 | IC = S.actOnInlineCommand(CommandTok.getLocation(), |
| 140 | CommandTok.getEndLocation(), |
| 141 | CommandTok.getCommandName(), |
| 142 | ArgTok.getLocation(), |
| 143 | ArgTok.getEndLocation(), |
| 144 | ArgTok.getText()); |
| 145 | } else { |
| 146 | IC = S.actOnInlineCommand(CommandTok.getLocation(), |
| 147 | CommandTok.getEndLocation(), |
| 148 | CommandTok.getCommandName()); |
| 149 | } |
| 150 | |
| 151 | Token Text; |
| 152 | while (Retokenizer.lexText(Text)) |
| 153 | putBack(Text); |
| 154 | |
| 155 | return IC; |
| 156 | } |
| 157 | |
| 158 | HTMLOpenTagComment *Parser::parseHTMLOpenTag() { |
| 159 | assert(Tok.is(tok::html_tag_open)); |
| 160 | HTMLOpenTagComment *HOT = |
| 161 | S.actOnHTMLOpenTagStart(Tok.getLocation(), |
| 162 | Tok.getHTMLTagOpenName()); |
| 163 | consumeToken(); |
| 164 | |
| 165 | SmallVector<HTMLOpenTagComment::Attribute, 2> Attrs; |
| 166 | while (true) { |
| 167 | if (Tok.is(tok::html_ident)) { |
| 168 | Token Ident = Tok; |
| 169 | consumeToken(); |
| 170 | if (Tok.isNot(tok::html_equals)) { |
| 171 | Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(), |
| 172 | Ident.getHTMLIdent())); |
| 173 | continue; |
| 174 | } |
| 175 | Token Equals = Tok; |
| 176 | consumeToken(); |
| 177 | if (Tok.isNot(tok::html_quoted_string)) { |
| 178 | // TODO: Diag() expected quoted string |
| 179 | Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(), |
| 180 | Ident.getHTMLIdent())); |
| 181 | continue; |
| 182 | } |
| 183 | Attrs.push_back(HTMLOpenTagComment::Attribute( |
| 184 | Ident.getLocation(), |
| 185 | Ident.getHTMLIdent(), |
| 186 | Equals.getLocation(), |
| 187 | SourceRange(Tok.getLocation(), |
| 188 | Tok.getEndLocation()), |
| 189 | Tok.getHTMLQuotedString())); |
| 190 | consumeToken(); |
| 191 | continue; |
| 192 | } else if (Tok.is(tok::html_greater)) { |
| 193 | HOT = S.actOnHTMLOpenTagFinish(HOT, |
| 194 | copyArray(llvm::makeArrayRef(Attrs)), |
| 195 | Tok.getLocation()); |
| 196 | consumeToken(); |
| 197 | return HOT; |
| 198 | } else if (Tok.is(tok::html_equals) || |
| 199 | Tok.is(tok::html_quoted_string)) { |
| 200 | // TODO: Diag() Err expected ident |
| 201 | while (Tok.is(tok::html_equals) || |
| 202 | Tok.is(tok::html_quoted_string)) |
| 203 | consumeToken(); |
| 204 | } else { |
| 205 | // Not a token from HTML open tag. Thus HTML tag prematurely ended. |
| 206 | // TODO: Diag() Err HTML tag prematurely ended |
| 207 | return S.actOnHTMLOpenTagFinish(HOT, |
| 208 | copyArray(llvm::makeArrayRef(Attrs)), |
| 209 | SourceLocation()); |
| 210 | } |
| 211 | } |
| 212 | } |
| 213 | |
| 214 | HTMLCloseTagComment *Parser::parseHTMLCloseTag() { |
| 215 | assert(Tok.is(tok::html_tag_close)); |
| 216 | Token TokTagOpen = Tok; |
| 217 | consumeToken(); |
| 218 | SourceLocation Loc; |
| 219 | if (Tok.is(tok::html_greater)) { |
| 220 | Loc = Tok.getLocation(); |
| 221 | consumeToken(); |
| 222 | } |
| 223 | |
| 224 | return S.actOnHTMLCloseTag(TokTagOpen.getLocation(), |
| 225 | Loc, |
| 226 | TokTagOpen.getHTMLTagCloseName()); |
| 227 | } |
| 228 | |
| 229 | BlockContentComment *Parser::parseParagraphOrBlockCommand() { |
| 230 | SmallVector<InlineContentComment *, 8> Content; |
| 231 | |
| 232 | while (true) { |
| 233 | switch (Tok.getKind()) { |
| 234 | case tok::verbatim_block_begin: |
| 235 | case tok::verbatim_line_name: |
| 236 | case tok::eof: |
| 237 | assert(Content.size() != 0); |
| 238 | break; // Block content or EOF ahead, finish this parapgaph. |
| 239 | |
| 240 | case tok::command: |
| 241 | if (S.isBlockCommand(Tok.getCommandName())) { |
| 242 | if (Content.size() == 0) |
| 243 | return parseBlockCommand(); |
| 244 | break; // Block command ahead, finish this parapgaph. |
| 245 | } |
| 246 | if (S.isInlineCommand(Tok.getCommandName())) { |
| 247 | Content.push_back(parseInlineCommand()); |
| 248 | continue; |
| 249 | } |
| 250 | |
| 251 | // Not a block command, not an inline command ==> an unknown command. |
| 252 | Content.push_back(S.actOnUnknownCommand(Tok.getLocation(), |
| 253 | Tok.getEndLocation(), |
| 254 | Tok.getCommandName())); |
| 255 | consumeToken(); |
| 256 | continue; |
| 257 | |
| 258 | case tok::newline: { |
| 259 | consumeToken(); |
| 260 | if (Tok.is(tok::newline) || Tok.is(tok::eof)) { |
| 261 | consumeToken(); |
| 262 | break; // Two newlines -- end of paragraph. |
| 263 | } |
| 264 | if (Content.size() > 0) |
| 265 | Content.back()->addTrailingNewline(); |
| 266 | continue; |
| 267 | } |
| 268 | |
| 269 | // Don't deal with HTML tag soup now. |
| 270 | case tok::html_tag_open: |
| 271 | Content.push_back(parseHTMLOpenTag()); |
| 272 | continue; |
| 273 | |
| 274 | case tok::html_tag_close: |
| 275 | Content.push_back(parseHTMLCloseTag()); |
| 276 | continue; |
| 277 | |
| 278 | case tok::text: |
| 279 | Content.push_back(S.actOnText(Tok.getLocation(), |
| 280 | Tok.getEndLocation(), |
| 281 | Tok.getText())); |
| 282 | consumeToken(); |
| 283 | continue; |
| 284 | |
| 285 | case tok::verbatim_block_line: |
| 286 | case tok::verbatim_block_end: |
| 287 | case tok::verbatim_line_text: |
| 288 | case tok::html_ident: |
| 289 | case tok::html_equals: |
| 290 | case tok::html_quoted_string: |
| 291 | case tok::html_greater: |
| 292 | llvm_unreachable("should not see this token"); |
| 293 | } |
| 294 | break; |
| 295 | } |
| 296 | |
| 297 | return S.actOnParagraphComment(copyArray(llvm::makeArrayRef(Content))); |
| 298 | } |
| 299 | |
| 300 | VerbatimBlockComment *Parser::parseVerbatimBlock() { |
| 301 | assert(Tok.is(tok::verbatim_block_begin)); |
| 302 | |
| 303 | VerbatimBlockComment *VB = |
| 304 | S.actOnVerbatimBlockStart(Tok.getLocation(), |
| 305 | Tok.getVerbatimBlockName()); |
| 306 | consumeToken(); |
| 307 | |
| 308 | // Don't create an empty line if verbatim opening command is followed |
| 309 | // by a newline. |
| 310 | if (Tok.is(tok::newline)) |
| 311 | consumeToken(); |
| 312 | |
| 313 | SmallVector<VerbatimBlockLineComment *, 8> Lines; |
| 314 | while (Tok.is(tok::verbatim_block_line) || |
| 315 | Tok.is(tok::newline)) { |
| 316 | VerbatimBlockLineComment *Line; |
| 317 | if (Tok.is(tok::verbatim_block_line)) { |
| 318 | Line = S.actOnVerbatimBlockLine(Tok.getLocation(), |
| 319 | Tok.getVerbatimBlockText()); |
| 320 | consumeToken(); |
| 321 | if (Tok.is(tok::newline)) { |
| 322 | consumeToken(); |
| 323 | } |
| 324 | } else { |
| 325 | // Empty line, just a tok::newline. |
| 326 | Line = S.actOnVerbatimBlockLine(Tok.getLocation(), |
| 327 | ""); |
| 328 | consumeToken(); |
| 329 | } |
| 330 | Lines.push_back(Line); |
| 331 | } |
| 332 | |
| 333 | assert(Tok.is(tok::verbatim_block_end)); |
| 334 | VB = S.actOnVerbatimBlockFinish(VB, Tok.getLocation(), |
| 335 | Tok.getVerbatimBlockName(), |
| 336 | copyArray(llvm::makeArrayRef(Lines))); |
| 337 | consumeToken(); |
| 338 | |
| 339 | return VB; |
| 340 | } |
| 341 | |
| 342 | VerbatimLineComment *Parser::parseVerbatimLine() { |
| 343 | assert(Tok.is(tok::verbatim_line_name)); |
| 344 | |
| 345 | Token NameTok = Tok; |
| 346 | consumeToken(); |
| 347 | |
| 348 | SourceLocation TextBegin; |
| 349 | StringRef Text; |
| 350 | // Next token might not be a tok::verbatim_line_text if verbatim line |
| 351 | // starting command comes just before a newline or comment end. |
| 352 | if (Tok.is(tok::verbatim_line_text)) { |
| 353 | TextBegin = Tok.getLocation(); |
| 354 | Text = Tok.getVerbatimLineText(); |
| 355 | } else { |
| 356 | TextBegin = NameTok.getEndLocation(); |
| 357 | Text = ""; |
| 358 | } |
| 359 | |
| 360 | VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(), |
| 361 | NameTok.getVerbatimLineName(), |
| 362 | TextBegin, |
| 363 | Text); |
| 364 | consumeToken(); |
| 365 | return VL; |
| 366 | } |
| 367 | |
| 368 | BlockContentComment *Parser::parseBlockContent() { |
| 369 | switch (Tok.getKind()) { |
| 370 | case tok::text: |
| 371 | case tok::command: |
| 372 | case tok::html_tag_open: |
| 373 | case tok::html_tag_close: |
| 374 | return parseParagraphOrBlockCommand(); |
| 375 | |
| 376 | case tok::verbatim_block_begin: |
| 377 | return parseVerbatimBlock(); |
| 378 | |
| 379 | case tok::verbatim_line_name: |
| 380 | return parseVerbatimLine(); |
| 381 | |
| 382 | case tok::eof: |
| 383 | case tok::newline: |
| 384 | case tok::verbatim_block_line: |
| 385 | case tok::verbatim_block_end: |
| 386 | case tok::verbatim_line_text: |
| 387 | case tok::html_ident: |
| 388 | case tok::html_equals: |
| 389 | case tok::html_quoted_string: |
| 390 | case tok::html_greater: |
| 391 | llvm_unreachable("should not see this token"); |
| 392 | } |
| 393 | } |
| 394 | |
| 395 | FullComment *Parser::parseFullComment() { |
| 396 | // Skip newlines at the beginning of the comment. |
| 397 | while (Tok.is(tok::newline)) |
| 398 | consumeToken(); |
| 399 | |
| 400 | SmallVector<BlockContentComment *, 8> Blocks; |
| 401 | while (Tok.isNot(tok::eof)) { |
| 402 | Blocks.push_back(parseBlockContent()); |
| 403 | |
| 404 | // Skip extra newlines after paragraph end. |
| 405 | while (Tok.is(tok::newline)) |
| 406 | consumeToken(); |
| 407 | } |
| 408 | return S.actOnFullComment(copyArray(llvm::makeArrayRef(Blocks))); |
| 409 | } |
| 410 | |
| 411 | } // end namespace comments |
| 412 | } // end namespace clang |
| 413 | |
| 414 | |