Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 1 | //===--- CommentParser.cpp - Doxygen comment parser -----------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | #include "clang/AST/CommentParser.h" |
| 11 | #include "clang/AST/CommentSema.h" |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 12 | #include "clang/AST/CommentDiagnostic.h" |
| 13 | #include "clang/Basic/SourceManager.h" |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 14 | #include "llvm/Support/ErrorHandling.h" |
| 15 | |
| 16 | namespace clang { |
| 17 | namespace comments { |
| 18 | |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame] | 19 | /// Re-lexes a sequence of tok::text tokens. |
| 20 | class TextTokenRetokenizer { |
| 21 | llvm::BumpPtrAllocator &Allocator; |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame^] | 22 | Parser &P; |
| 23 | SmallVector<Token, 16> Toks; |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame] | 24 | |
| 25 | struct Position { |
| 26 | unsigned CurToken; |
| 27 | const char *BufferStart; |
| 28 | const char *BufferEnd; |
| 29 | const char *BufferPtr; |
| 30 | SourceLocation BufferStartLoc; |
| 31 | }; |
| 32 | |
| 33 | /// Current position in Toks. |
| 34 | Position Pos; |
| 35 | |
| 36 | bool isEnd() const { |
| 37 | return Pos.CurToken >= Toks.size(); |
| 38 | } |
| 39 | |
| 40 | /// Sets up the buffer pointers to point to current token. |
| 41 | void setupBuffer() { |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame^] | 42 | assert(!isEnd()); |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame] | 43 | const Token &Tok = Toks[Pos.CurToken]; |
| 44 | |
| 45 | Pos.BufferStart = Tok.getText().begin(); |
| 46 | Pos.BufferEnd = Tok.getText().end(); |
| 47 | Pos.BufferPtr = Pos.BufferStart; |
| 48 | Pos.BufferStartLoc = Tok.getLocation(); |
| 49 | } |
| 50 | |
| 51 | SourceLocation getSourceLocation() const { |
| 52 | const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart; |
| 53 | return Pos.BufferStartLoc.getLocWithOffset(CharNo); |
| 54 | } |
| 55 | |
| 56 | char peek() const { |
| 57 | assert(!isEnd()); |
| 58 | assert(Pos.BufferPtr != Pos.BufferEnd); |
| 59 | return *Pos.BufferPtr; |
| 60 | } |
| 61 | |
| 62 | void consumeChar() { |
| 63 | assert(!isEnd()); |
| 64 | assert(Pos.BufferPtr != Pos.BufferEnd); |
| 65 | Pos.BufferPtr++; |
| 66 | if (Pos.BufferPtr == Pos.BufferEnd) { |
| 67 | Pos.CurToken++; |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame^] | 68 | if (isEnd() && addToken()) { |
| 69 | assert(!isEnd()); |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame] | 70 | setupBuffer(); |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame^] | 71 | } |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame] | 72 | } |
| 73 | } |
| 74 | |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame^] | 75 | /// Add a token. |
| 76 | /// Returns true on success, false if there are no interesting tokens to |
| 77 | /// fetch from lexer. |
| 78 | bool addToken() { |
| 79 | if (P.Tok.isNot(tok::text)) |
| 80 | return false; |
| 81 | |
| 82 | Toks.push_back(P.Tok); |
| 83 | P.consumeToken(); |
| 84 | if (Toks.size() == 1) |
| 85 | setupBuffer(); |
| 86 | return true; |
| 87 | } |
| 88 | |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame] | 89 | static bool isWhitespace(char C) { |
| 90 | return C == ' ' || C == '\n' || C == '\r' || |
| 91 | C == '\t' || C == '\f' || C == '\v'; |
| 92 | } |
| 93 | |
| 94 | void consumeWhitespace() { |
| 95 | while (!isEnd()) { |
| 96 | if (isWhitespace(peek())) |
| 97 | consumeChar(); |
| 98 | else |
| 99 | break; |
| 100 | } |
| 101 | } |
| 102 | |
| 103 | void formTokenWithChars(Token &Result, |
| 104 | SourceLocation Loc, |
| 105 | const char *TokBegin, |
| 106 | unsigned TokLength, |
| 107 | StringRef Text) { |
| 108 | Result.setLocation(Loc); |
| 109 | Result.setKind(tok::text); |
| 110 | Result.setLength(TokLength); |
| 111 | #ifndef NDEBUG |
| 112 | Result.TextPtr1 = "<UNSET>"; |
| 113 | Result.TextLen1 = 7; |
| 114 | #endif |
| 115 | Result.setText(Text); |
| 116 | } |
| 117 | |
| 118 | public: |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame^] | 119 | TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P): |
| 120 | Allocator(Allocator), P(P) { |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame] | 121 | Pos.CurToken = 0; |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame^] | 122 | addToken(); |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame] | 123 | } |
| 124 | |
| 125 | /// Extract a word -- sequence of non-whitespace characters. |
| 126 | bool lexWord(Token &Tok) { |
| 127 | if (isEnd()) |
| 128 | return false; |
| 129 | |
| 130 | Position SavedPos = Pos; |
| 131 | |
| 132 | consumeWhitespace(); |
| 133 | SmallString<32> WordText; |
| 134 | const char *WordBegin = Pos.BufferPtr; |
| 135 | SourceLocation Loc = getSourceLocation(); |
| 136 | while (!isEnd()) { |
| 137 | const char C = peek(); |
| 138 | if (!isWhitespace(C)) { |
| 139 | WordText.push_back(C); |
| 140 | consumeChar(); |
| 141 | } else |
| 142 | break; |
| 143 | } |
| 144 | const unsigned Length = WordText.size(); |
| 145 | if (Length == 0) { |
| 146 | Pos = SavedPos; |
| 147 | return false; |
| 148 | } |
| 149 | |
| 150 | char *TextPtr = Allocator.Allocate<char>(Length + 1); |
| 151 | |
| 152 | memcpy(TextPtr, WordText.c_str(), Length + 1); |
| 153 | StringRef Text = StringRef(TextPtr, Length); |
| 154 | |
| 155 | formTokenWithChars(Tok, Loc, WordBegin, |
| 156 | Pos.BufferPtr - WordBegin, Text); |
| 157 | return true; |
| 158 | } |
| 159 | |
| 160 | bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) { |
| 161 | if (isEnd()) |
| 162 | return false; |
| 163 | |
| 164 | Position SavedPos = Pos; |
| 165 | |
| 166 | consumeWhitespace(); |
| 167 | SmallString<32> WordText; |
| 168 | const char *WordBegin = Pos.BufferPtr; |
| 169 | SourceLocation Loc = getSourceLocation(); |
| 170 | bool Error = false; |
| 171 | if (!isEnd()) { |
| 172 | const char C = peek(); |
| 173 | if (C == OpenDelim) { |
| 174 | WordText.push_back(C); |
| 175 | consumeChar(); |
| 176 | } else |
| 177 | Error = true; |
| 178 | } |
| 179 | char C = '\0'; |
| 180 | while (!Error && !isEnd()) { |
| 181 | C = peek(); |
| 182 | WordText.push_back(C); |
| 183 | consumeChar(); |
| 184 | if (C == CloseDelim) |
| 185 | break; |
| 186 | } |
| 187 | if (!Error && C != CloseDelim) |
| 188 | Error = true; |
| 189 | |
| 190 | if (Error) { |
| 191 | Pos = SavedPos; |
| 192 | return false; |
| 193 | } |
| 194 | |
| 195 | const unsigned Length = WordText.size(); |
| 196 | char *TextPtr = Allocator.Allocate<char>(Length + 1); |
| 197 | |
| 198 | memcpy(TextPtr, WordText.c_str(), Length + 1); |
| 199 | StringRef Text = StringRef(TextPtr, Length); |
| 200 | |
| 201 | formTokenWithChars(Tok, Loc, WordBegin, |
| 202 | Pos.BufferPtr - WordBegin, Text); |
| 203 | return true; |
| 204 | } |
| 205 | |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame^] | 206 | /// Put back tokens that we didn't consume. |
| 207 | void putBackLeftoverTokens() { |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame] | 208 | if (isEnd()) |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame^] | 209 | return; |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame] | 210 | |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame^] | 211 | bool HavePartialTok = false; |
| 212 | Token PartialTok; |
| 213 | if (Pos.BufferPtr != Pos.BufferStart) { |
| 214 | formTokenWithChars(PartialTok, getSourceLocation(), |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame] | 215 | Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr, |
| 216 | StringRef(Pos.BufferPtr, |
| 217 | Pos.BufferEnd - Pos.BufferPtr)); |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame^] | 218 | HavePartialTok = true; |
| 219 | Pos.CurToken++; |
| 220 | } |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame] | 221 | |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame^] | 222 | P.putBack(llvm::makeArrayRef(Toks.begin() + Pos.CurToken, Toks.end())); |
| 223 | Pos.CurToken = Toks.size(); |
| 224 | |
| 225 | if (HavePartialTok) |
| 226 | P.putBack(PartialTok); |
Dmitri Gribenko | c4b0f9b | 2012-07-24 17:43:18 +0000 | [diff] [blame] | 227 | } |
| 228 | }; |
| 229 | |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 230 | Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator, |
| 231 | const SourceManager &SourceMgr, DiagnosticsEngine &Diags): |
| 232 | L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags) { |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 233 | consumeToken(); |
| 234 | } |
| 235 | |
| 236 | ParamCommandComment *Parser::parseParamCommandArgs( |
| 237 | ParamCommandComment *PC, |
| 238 | TextTokenRetokenizer &Retokenizer) { |
| 239 | Token Arg; |
| 240 | // Check if argument looks like direction specification: [dir] |
| 241 | // e.g., [in], [out], [in,out] |
| 242 | if (Retokenizer.lexDelimitedSeq(Arg, '[', ']')) |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 243 | PC = S.actOnParamCommandDirectionArg(PC, |
| 244 | Arg.getLocation(), |
| 245 | Arg.getEndLocation(), |
| 246 | Arg.getText()); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 247 | |
| 248 | if (Retokenizer.lexWord(Arg)) |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 249 | PC = S.actOnParamCommandParamNameArg(PC, |
| 250 | Arg.getLocation(), |
| 251 | Arg.getEndLocation(), |
| 252 | Arg.getText()); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 253 | |
| 254 | return PC; |
| 255 | } |
| 256 | |
| 257 | BlockCommandComment *Parser::parseBlockCommandArgs( |
| 258 | BlockCommandComment *BC, |
| 259 | TextTokenRetokenizer &Retokenizer, |
| 260 | unsigned NumArgs) { |
| 261 | typedef BlockCommandComment::Argument Argument; |
Dmitri Gribenko | 814e219 | 2012-07-06 16:41:59 +0000 | [diff] [blame] | 262 | Argument *Args = |
| 263 | new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs]; |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 264 | unsigned ParsedArgs = 0; |
| 265 | Token Arg; |
| 266 | while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) { |
| 267 | Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(), |
| 268 | Arg.getEndLocation()), |
| 269 | Arg.getText()); |
| 270 | ParsedArgs++; |
| 271 | } |
| 272 | |
| 273 | return S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs)); |
| 274 | } |
| 275 | |
| 276 | BlockCommandComment *Parser::parseBlockCommand() { |
| 277 | assert(Tok.is(tok::command)); |
| 278 | |
| 279 | ParamCommandComment *PC; |
| 280 | BlockCommandComment *BC; |
| 281 | bool IsParam = false; |
| 282 | unsigned NumArgs = 0; |
| 283 | if (S.isParamCommand(Tok.getCommandName())) { |
| 284 | IsParam = true; |
| 285 | PC = S.actOnParamCommandStart(Tok.getLocation(), |
| 286 | Tok.getEndLocation(), |
| 287 | Tok.getCommandName()); |
| 288 | } else { |
| 289 | NumArgs = S.getBlockCommandNumArgs(Tok.getCommandName()); |
| 290 | BC = S.actOnBlockCommandStart(Tok.getLocation(), |
| 291 | Tok.getEndLocation(), |
| 292 | Tok.getCommandName()); |
| 293 | } |
| 294 | consumeToken(); |
| 295 | |
| 296 | if (Tok.is(tok::command) && S.isBlockCommand(Tok.getCommandName())) { |
| 297 | // Block command ahead. We can't nest block commands, so pretend that this |
| 298 | // command has an empty argument. |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 299 | ParagraphComment *PC = S.actOnParagraphComment( |
| 300 | ArrayRef<InlineContentComment *>()); |
| 301 | return S.actOnBlockCommandFinish(BC, PC); |
| 302 | } |
| 303 | |
| 304 | if (IsParam || NumArgs > 0) { |
| 305 | // In order to parse command arguments we need to retokenize a few |
| 306 | // following text tokens. |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame^] | 307 | TextTokenRetokenizer Retokenizer(Allocator, *this); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 308 | |
| 309 | if (IsParam) |
| 310 | PC = parseParamCommandArgs(PC, Retokenizer); |
| 311 | else |
| 312 | BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs); |
| 313 | |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame^] | 314 | Retokenizer.putBackLeftoverTokens(); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 315 | } |
| 316 | |
| 317 | BlockContentComment *Block = parseParagraphOrBlockCommand(); |
| 318 | // Since we have checked for a block command, we should have parsed a |
| 319 | // paragraph. |
| 320 | if (IsParam) |
| 321 | return S.actOnParamCommandFinish(PC, cast<ParagraphComment>(Block)); |
| 322 | else |
| 323 | return S.actOnBlockCommandFinish(BC, cast<ParagraphComment>(Block)); |
| 324 | } |
| 325 | |
| 326 | InlineCommandComment *Parser::parseInlineCommand() { |
| 327 | assert(Tok.is(tok::command)); |
| 328 | |
| 329 | const Token CommandTok = Tok; |
| 330 | consumeToken(); |
| 331 | |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame^] | 332 | TextTokenRetokenizer Retokenizer(Allocator, *this); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 333 | |
| 334 | Token ArgTok; |
| 335 | bool ArgTokValid = Retokenizer.lexWord(ArgTok); |
| 336 | |
| 337 | InlineCommandComment *IC; |
| 338 | if (ArgTokValid) { |
| 339 | IC = S.actOnInlineCommand(CommandTok.getLocation(), |
| 340 | CommandTok.getEndLocation(), |
| 341 | CommandTok.getCommandName(), |
| 342 | ArgTok.getLocation(), |
| 343 | ArgTok.getEndLocation(), |
| 344 | ArgTok.getText()); |
| 345 | } else { |
| 346 | IC = S.actOnInlineCommand(CommandTok.getLocation(), |
| 347 | CommandTok.getEndLocation(), |
| 348 | CommandTok.getCommandName()); |
| 349 | } |
| 350 | |
Dmitri Gribenko | db13f04 | 2012-07-24 17:52:18 +0000 | [diff] [blame^] | 351 | Retokenizer.putBackLeftoverTokens(); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 352 | |
| 353 | return IC; |
| 354 | } |
| 355 | |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 356 | HTMLStartTagComment *Parser::parseHTMLStartTag() { |
| 357 | assert(Tok.is(tok::html_start_tag)); |
| 358 | HTMLStartTagComment *HST = |
| 359 | S.actOnHTMLStartTagStart(Tok.getLocation(), |
| 360 | Tok.getHTMLTagStartName()); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 361 | consumeToken(); |
| 362 | |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 363 | SmallVector<HTMLStartTagComment::Attribute, 2> Attrs; |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 364 | while (true) { |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 365 | switch (Tok.getKind()) { |
| 366 | case tok::html_ident: { |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 367 | Token Ident = Tok; |
| 368 | consumeToken(); |
| 369 | if (Tok.isNot(tok::html_equals)) { |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 370 | Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(), |
| 371 | Ident.getHTMLIdent())); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 372 | continue; |
| 373 | } |
| 374 | Token Equals = Tok; |
| 375 | consumeToken(); |
| 376 | if (Tok.isNot(tok::html_quoted_string)) { |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 377 | Diag(Tok.getLocation(), |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 378 | diag::warn_doc_html_start_tag_expected_quoted_string) |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 379 | << SourceRange(Equals.getLocation()); |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 380 | Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(), |
| 381 | Ident.getHTMLIdent())); |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 382 | while (Tok.is(tok::html_equals) || |
| 383 | Tok.is(tok::html_quoted_string)) |
| 384 | consumeToken(); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 385 | continue; |
| 386 | } |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 387 | Attrs.push_back(HTMLStartTagComment::Attribute( |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 388 | Ident.getLocation(), |
| 389 | Ident.getHTMLIdent(), |
| 390 | Equals.getLocation(), |
| 391 | SourceRange(Tok.getLocation(), |
| 392 | Tok.getEndLocation()), |
| 393 | Tok.getHTMLQuotedString())); |
| 394 | consumeToken(); |
| 395 | continue; |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 396 | } |
| 397 | |
| 398 | case tok::html_greater: |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 399 | HST = S.actOnHTMLStartTagFinish(HST, |
| 400 | copyArray(llvm::makeArrayRef(Attrs)), |
| 401 | Tok.getLocation(), |
| 402 | /* IsSelfClosing = */ false); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 403 | consumeToken(); |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 404 | return HST; |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 405 | |
| 406 | case tok::html_slash_greater: |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 407 | HST = S.actOnHTMLStartTagFinish(HST, |
| 408 | copyArray(llvm::makeArrayRef(Attrs)), |
| 409 | Tok.getLocation(), |
| 410 | /* IsSelfClosing = */ true); |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 411 | consumeToken(); |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 412 | return HST; |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 413 | |
| 414 | case tok::html_equals: |
| 415 | case tok::html_quoted_string: |
| 416 | Diag(Tok.getLocation(), |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 417 | diag::warn_doc_html_start_tag_expected_ident_or_greater); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 418 | while (Tok.is(tok::html_equals) || |
| 419 | Tok.is(tok::html_quoted_string)) |
| 420 | consumeToken(); |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 421 | if (Tok.is(tok::html_ident) || |
| 422 | Tok.is(tok::html_greater) || |
| 423 | Tok.is(tok::html_slash_greater)) |
| 424 | continue; |
| 425 | |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 426 | return S.actOnHTMLStartTagFinish(HST, |
| 427 | copyArray(llvm::makeArrayRef(Attrs)), |
| 428 | SourceLocation(), |
| 429 | /* IsSelfClosing = */ false); |
| 430 | |
| 431 | default: |
| 432 | // Not a token from an HTML start tag. Thus HTML tag prematurely ended. |
| 433 | HST = S.actOnHTMLStartTagFinish(HST, |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 434 | copyArray(llvm::makeArrayRef(Attrs)), |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 435 | SourceLocation(), |
| 436 | /* IsSelfClosing = */ false); |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 437 | bool StartLineInvalid; |
| 438 | const unsigned StartLine = SourceMgr.getPresumedLineNumber( |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 439 | HST->getLocation(), |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 440 | &StartLineInvalid); |
| 441 | bool EndLineInvalid; |
| 442 | const unsigned EndLine = SourceMgr.getPresumedLineNumber( |
| 443 | Tok.getLocation(), |
| 444 | &EndLineInvalid); |
| 445 | if (StartLineInvalid || EndLineInvalid || StartLine == EndLine) |
| 446 | Diag(Tok.getLocation(), |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 447 | diag::warn_doc_html_start_tag_expected_ident_or_greater) |
| 448 | << HST->getSourceRange(); |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 449 | else { |
| 450 | Diag(Tok.getLocation(), |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 451 | diag::warn_doc_html_start_tag_expected_ident_or_greater); |
| 452 | Diag(HST->getLocation(), diag::note_doc_html_tag_started_here) |
| 453 | << HST->getSourceRange(); |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 454 | } |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 455 | return HST; |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 456 | } |
| 457 | } |
| 458 | } |
| 459 | |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 460 | HTMLEndTagComment *Parser::parseHTMLEndTag() { |
| 461 | assert(Tok.is(tok::html_end_tag)); |
| 462 | Token TokEndTag = Tok; |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 463 | consumeToken(); |
| 464 | SourceLocation Loc; |
| 465 | if (Tok.is(tok::html_greater)) { |
| 466 | Loc = Tok.getLocation(); |
| 467 | consumeToken(); |
| 468 | } |
| 469 | |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 470 | return S.actOnHTMLEndTag(TokEndTag.getLocation(), |
| 471 | Loc, |
| 472 | TokEndTag.getHTMLTagEndName()); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 473 | } |
| 474 | |
| 475 | BlockContentComment *Parser::parseParagraphOrBlockCommand() { |
| 476 | SmallVector<InlineContentComment *, 8> Content; |
| 477 | |
| 478 | while (true) { |
| 479 | switch (Tok.getKind()) { |
| 480 | case tok::verbatim_block_begin: |
| 481 | case tok::verbatim_line_name: |
| 482 | case tok::eof: |
| 483 | assert(Content.size() != 0); |
| 484 | break; // Block content or EOF ahead, finish this parapgaph. |
| 485 | |
| 486 | case tok::command: |
| 487 | if (S.isBlockCommand(Tok.getCommandName())) { |
| 488 | if (Content.size() == 0) |
| 489 | return parseBlockCommand(); |
| 490 | break; // Block command ahead, finish this parapgaph. |
| 491 | } |
| 492 | if (S.isInlineCommand(Tok.getCommandName())) { |
| 493 | Content.push_back(parseInlineCommand()); |
| 494 | continue; |
| 495 | } |
| 496 | |
| 497 | // Not a block command, not an inline command ==> an unknown command. |
| 498 | Content.push_back(S.actOnUnknownCommand(Tok.getLocation(), |
| 499 | Tok.getEndLocation(), |
| 500 | Tok.getCommandName())); |
| 501 | consumeToken(); |
| 502 | continue; |
| 503 | |
| 504 | case tok::newline: { |
| 505 | consumeToken(); |
| 506 | if (Tok.is(tok::newline) || Tok.is(tok::eof)) { |
| 507 | consumeToken(); |
| 508 | break; // Two newlines -- end of paragraph. |
| 509 | } |
| 510 | if (Content.size() > 0) |
| 511 | Content.back()->addTrailingNewline(); |
| 512 | continue; |
| 513 | } |
| 514 | |
| 515 | // Don't deal with HTML tag soup now. |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 516 | case tok::html_start_tag: |
| 517 | Content.push_back(parseHTMLStartTag()); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 518 | continue; |
| 519 | |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 520 | case tok::html_end_tag: |
| 521 | Content.push_back(parseHTMLEndTag()); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 522 | continue; |
| 523 | |
| 524 | case tok::text: |
| 525 | Content.push_back(S.actOnText(Tok.getLocation(), |
| 526 | Tok.getEndLocation(), |
| 527 | Tok.getText())); |
| 528 | consumeToken(); |
| 529 | continue; |
| 530 | |
| 531 | case tok::verbatim_block_line: |
| 532 | case tok::verbatim_block_end: |
| 533 | case tok::verbatim_line_text: |
| 534 | case tok::html_ident: |
| 535 | case tok::html_equals: |
| 536 | case tok::html_quoted_string: |
| 537 | case tok::html_greater: |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 538 | case tok::html_slash_greater: |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 539 | llvm_unreachable("should not see this token"); |
| 540 | } |
| 541 | break; |
| 542 | } |
| 543 | |
| 544 | return S.actOnParagraphComment(copyArray(llvm::makeArrayRef(Content))); |
| 545 | } |
| 546 | |
| 547 | VerbatimBlockComment *Parser::parseVerbatimBlock() { |
| 548 | assert(Tok.is(tok::verbatim_block_begin)); |
| 549 | |
| 550 | VerbatimBlockComment *VB = |
| 551 | S.actOnVerbatimBlockStart(Tok.getLocation(), |
| 552 | Tok.getVerbatimBlockName()); |
| 553 | consumeToken(); |
| 554 | |
| 555 | // Don't create an empty line if verbatim opening command is followed |
| 556 | // by a newline. |
| 557 | if (Tok.is(tok::newline)) |
| 558 | consumeToken(); |
| 559 | |
| 560 | SmallVector<VerbatimBlockLineComment *, 8> Lines; |
| 561 | while (Tok.is(tok::verbatim_block_line) || |
| 562 | Tok.is(tok::newline)) { |
| 563 | VerbatimBlockLineComment *Line; |
| 564 | if (Tok.is(tok::verbatim_block_line)) { |
| 565 | Line = S.actOnVerbatimBlockLine(Tok.getLocation(), |
| 566 | Tok.getVerbatimBlockText()); |
| 567 | consumeToken(); |
| 568 | if (Tok.is(tok::newline)) { |
| 569 | consumeToken(); |
| 570 | } |
| 571 | } else { |
| 572 | // Empty line, just a tok::newline. |
Dmitri Gribenko | 94572c3 | 2012-07-18 21:27:38 +0000 | [diff] [blame] | 573 | Line = S.actOnVerbatimBlockLine(Tok.getLocation(), ""); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 574 | consumeToken(); |
| 575 | } |
| 576 | Lines.push_back(Line); |
| 577 | } |
| 578 | |
Dmitri Gribenko | 9f08f49 | 2012-07-20 20:18:53 +0000 | [diff] [blame] | 579 | if (Tok.is(tok::verbatim_block_end)) { |
| 580 | VB = S.actOnVerbatimBlockFinish(VB, Tok.getLocation(), |
| 581 | Tok.getVerbatimBlockName(), |
| 582 | copyArray(llvm::makeArrayRef(Lines))); |
| 583 | consumeToken(); |
| 584 | } else { |
| 585 | // Unterminated \\verbatim block |
| 586 | VB = S.actOnVerbatimBlockFinish(VB, SourceLocation(), "", |
| 587 | copyArray(llvm::makeArrayRef(Lines))); |
| 588 | } |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 589 | |
| 590 | return VB; |
| 591 | } |
| 592 | |
| 593 | VerbatimLineComment *Parser::parseVerbatimLine() { |
| 594 | assert(Tok.is(tok::verbatim_line_name)); |
| 595 | |
| 596 | Token NameTok = Tok; |
| 597 | consumeToken(); |
| 598 | |
| 599 | SourceLocation TextBegin; |
| 600 | StringRef Text; |
| 601 | // Next token might not be a tok::verbatim_line_text if verbatim line |
| 602 | // starting command comes just before a newline or comment end. |
| 603 | if (Tok.is(tok::verbatim_line_text)) { |
| 604 | TextBegin = Tok.getLocation(); |
| 605 | Text = Tok.getVerbatimLineText(); |
| 606 | } else { |
| 607 | TextBegin = NameTok.getEndLocation(); |
| 608 | Text = ""; |
| 609 | } |
| 610 | |
| 611 | VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(), |
| 612 | NameTok.getVerbatimLineName(), |
| 613 | TextBegin, |
| 614 | Text); |
| 615 | consumeToken(); |
| 616 | return VL; |
| 617 | } |
| 618 | |
| 619 | BlockContentComment *Parser::parseBlockContent() { |
| 620 | switch (Tok.getKind()) { |
| 621 | case tok::text: |
| 622 | case tok::command: |
Dmitri Gribenko | 3f38bf2 | 2012-07-13 00:44:24 +0000 | [diff] [blame] | 623 | case tok::html_start_tag: |
| 624 | case tok::html_end_tag: |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 625 | return parseParagraphOrBlockCommand(); |
| 626 | |
| 627 | case tok::verbatim_block_begin: |
| 628 | return parseVerbatimBlock(); |
| 629 | |
| 630 | case tok::verbatim_line_name: |
| 631 | return parseVerbatimLine(); |
| 632 | |
| 633 | case tok::eof: |
| 634 | case tok::newline: |
| 635 | case tok::verbatim_block_line: |
| 636 | case tok::verbatim_block_end: |
| 637 | case tok::verbatim_line_text: |
| 638 | case tok::html_ident: |
| 639 | case tok::html_equals: |
| 640 | case tok::html_quoted_string: |
| 641 | case tok::html_greater: |
Dmitri Gribenko | a5ef44f | 2012-07-11 21:38:39 +0000 | [diff] [blame] | 642 | case tok::html_slash_greater: |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 643 | llvm_unreachable("should not see this token"); |
| 644 | } |
Matt Beaumont-Gay | 4d48b5c | 2012-07-06 21:13:09 +0000 | [diff] [blame] | 645 | llvm_unreachable("bogus token kind"); |
Dmitri Gribenko | 8d3ba23 | 2012-07-06 00:28:32 +0000 | [diff] [blame] | 646 | } |
| 647 | |
| 648 | FullComment *Parser::parseFullComment() { |
| 649 | // Skip newlines at the beginning of the comment. |
| 650 | while (Tok.is(tok::newline)) |
| 651 | consumeToken(); |
| 652 | |
| 653 | SmallVector<BlockContentComment *, 8> Blocks; |
| 654 | while (Tok.isNot(tok::eof)) { |
| 655 | Blocks.push_back(parseBlockContent()); |
| 656 | |
| 657 | // Skip extra newlines after paragraph end. |
| 658 | while (Tok.is(tok::newline)) |
| 659 | consumeToken(); |
| 660 | } |
| 661 | return S.actOnFullComment(copyArray(llvm::makeArrayRef(Blocks))); |
| 662 | } |
| 663 | |
| 664 | } // end namespace comments |
| 665 | } // end namespace clang |