blob: 5f45125bdcef736ee50c42267f423eb51f29df6b [file] [log] [blame]
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +00001//===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "clang/AST/CommentParser.h"
Dmitri Gribenkoaa580812012-08-09 00:03:17 +000011#include "clang/AST/CommentCommandTraits.h"
Chandler Carruth55fc8732012-12-04 09:13:33 +000012#include "clang/AST/CommentDiagnostic.h"
13#include "clang/AST/CommentSema.h"
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +000014#include "clang/Basic/SourceManager.h"
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +000015#include "llvm/Support/ErrorHandling.h"
16
17namespace clang {
18namespace comments {
19
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +000020/// Re-lexes a sequence of tok::text tokens.
21class TextTokenRetokenizer {
22 llvm::BumpPtrAllocator &Allocator;
Dmitri Gribenkodb13f042012-07-24 17:52:18 +000023 Parser &P;
Dmitri Gribenko0c43a922012-07-24 18:23:31 +000024
25 /// This flag is set when there are no more tokens we can fetch from lexer.
26 bool NoMoreInterestingTokens;
27
28 /// Token buffer: tokens we have processed and lookahead.
Dmitri Gribenkodb13f042012-07-24 17:52:18 +000029 SmallVector<Token, 16> Toks;
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +000030
Dmitri Gribenko0c43a922012-07-24 18:23:31 +000031 /// A position in \c Toks.
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +000032 struct Position {
33 unsigned CurToken;
34 const char *BufferStart;
35 const char *BufferEnd;
36 const char *BufferPtr;
37 SourceLocation BufferStartLoc;
38 };
39
40 /// Current position in Toks.
41 Position Pos;
42
43 bool isEnd() const {
44 return Pos.CurToken >= Toks.size();
45 }
46
47 /// Sets up the buffer pointers to point to current token.
48 void setupBuffer() {
Dmitri Gribenkodb13f042012-07-24 17:52:18 +000049 assert(!isEnd());
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +000050 const Token &Tok = Toks[Pos.CurToken];
51
52 Pos.BufferStart = Tok.getText().begin();
53 Pos.BufferEnd = Tok.getText().end();
54 Pos.BufferPtr = Pos.BufferStart;
55 Pos.BufferStartLoc = Tok.getLocation();
56 }
57
58 SourceLocation getSourceLocation() const {
59 const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart;
60 return Pos.BufferStartLoc.getLocWithOffset(CharNo);
61 }
62
63 char peek() const {
64 assert(!isEnd());
65 assert(Pos.BufferPtr != Pos.BufferEnd);
66 return *Pos.BufferPtr;
67 }
68
69 void consumeChar() {
70 assert(!isEnd());
71 assert(Pos.BufferPtr != Pos.BufferEnd);
72 Pos.BufferPtr++;
73 if (Pos.BufferPtr == Pos.BufferEnd) {
74 Pos.CurToken++;
Dmitri Gribenko0c43a922012-07-24 18:23:31 +000075 if (isEnd() && !addToken())
76 return;
77
78 assert(!isEnd());
79 setupBuffer();
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +000080 }
81 }
82
Dmitri Gribenkodb13f042012-07-24 17:52:18 +000083 /// Add a token.
84 /// Returns true on success, false if there are no interesting tokens to
85 /// fetch from lexer.
86 bool addToken() {
Dmitri Gribenko0c43a922012-07-24 18:23:31 +000087 if (NoMoreInterestingTokens)
Dmitri Gribenkodb13f042012-07-24 17:52:18 +000088 return false;
89
Dmitri Gribenko0c43a922012-07-24 18:23:31 +000090 if (P.Tok.is(tok::newline)) {
91 // If we see a single newline token between text tokens, skip it.
92 Token Newline = P.Tok;
93 P.consumeToken();
94 if (P.Tok.isNot(tok::text)) {
95 P.putBack(Newline);
96 NoMoreInterestingTokens = true;
97 return false;
98 }
99 }
100 if (P.Tok.isNot(tok::text)) {
101 NoMoreInterestingTokens = true;
102 return false;
103 }
104
Dmitri Gribenkodb13f042012-07-24 17:52:18 +0000105 Toks.push_back(P.Tok);
106 P.consumeToken();
107 if (Toks.size() == 1)
108 setupBuffer();
109 return true;
110 }
111
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +0000112 static bool isWhitespace(char C) {
113 return C == ' ' || C == '\n' || C == '\r' ||
114 C == '\t' || C == '\f' || C == '\v';
115 }
116
117 void consumeWhitespace() {
118 while (!isEnd()) {
119 if (isWhitespace(peek()))
120 consumeChar();
121 else
122 break;
123 }
124 }
125
126 void formTokenWithChars(Token &Result,
127 SourceLocation Loc,
128 const char *TokBegin,
129 unsigned TokLength,
130 StringRef Text) {
131 Result.setLocation(Loc);
132 Result.setKind(tok::text);
133 Result.setLength(TokLength);
134#ifndef NDEBUG
Dmitri Gribenkoe4330a32012-09-10 20:32:42 +0000135 Result.TextPtr = "<UNSET>";
136 Result.IntVal = 7;
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +0000137#endif
138 Result.setText(Text);
139 }
140
141public:
Dmitri Gribenkodb13f042012-07-24 17:52:18 +0000142 TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
Dmitri Gribenko0c43a922012-07-24 18:23:31 +0000143 Allocator(Allocator), P(P), NoMoreInterestingTokens(false) {
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +0000144 Pos.CurToken = 0;
Dmitri Gribenkodb13f042012-07-24 17:52:18 +0000145 addToken();
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +0000146 }
147
148 /// Extract a word -- sequence of non-whitespace characters.
149 bool lexWord(Token &Tok) {
150 if (isEnd())
151 return false;
152
153 Position SavedPos = Pos;
154
155 consumeWhitespace();
156 SmallString<32> WordText;
157 const char *WordBegin = Pos.BufferPtr;
158 SourceLocation Loc = getSourceLocation();
159 while (!isEnd()) {
160 const char C = peek();
161 if (!isWhitespace(C)) {
162 WordText.push_back(C);
163 consumeChar();
164 } else
165 break;
166 }
167 const unsigned Length = WordText.size();
168 if (Length == 0) {
169 Pos = SavedPos;
170 return false;
171 }
172
173 char *TextPtr = Allocator.Allocate<char>(Length + 1);
174
175 memcpy(TextPtr, WordText.c_str(), Length + 1);
176 StringRef Text = StringRef(TextPtr, Length);
177
Dmitri Gribenkoca57ccd2012-12-19 17:34:55 +0000178 formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +0000179 return true;
180 }
181
182 bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) {
183 if (isEnd())
184 return false;
185
186 Position SavedPos = Pos;
187
188 consumeWhitespace();
189 SmallString<32> WordText;
190 const char *WordBegin = Pos.BufferPtr;
191 SourceLocation Loc = getSourceLocation();
192 bool Error = false;
193 if (!isEnd()) {
194 const char C = peek();
195 if (C == OpenDelim) {
196 WordText.push_back(C);
197 consumeChar();
198 } else
199 Error = true;
200 }
201 char C = '\0';
202 while (!Error && !isEnd()) {
203 C = peek();
204 WordText.push_back(C);
205 consumeChar();
206 if (C == CloseDelim)
207 break;
208 }
209 if (!Error && C != CloseDelim)
210 Error = true;
211
212 if (Error) {
213 Pos = SavedPos;
214 return false;
215 }
216
217 const unsigned Length = WordText.size();
218 char *TextPtr = Allocator.Allocate<char>(Length + 1);
219
220 memcpy(TextPtr, WordText.c_str(), Length + 1);
221 StringRef Text = StringRef(TextPtr, Length);
222
223 formTokenWithChars(Tok, Loc, WordBegin,
224 Pos.BufferPtr - WordBegin, Text);
225 return true;
226 }
227
Dmitri Gribenkodb13f042012-07-24 17:52:18 +0000228 /// Put back tokens that we didn't consume.
229 void putBackLeftoverTokens() {
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +0000230 if (isEnd())
Dmitri Gribenkodb13f042012-07-24 17:52:18 +0000231 return;
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +0000232
Dmitri Gribenkodb13f042012-07-24 17:52:18 +0000233 bool HavePartialTok = false;
234 Token PartialTok;
235 if (Pos.BufferPtr != Pos.BufferStart) {
236 formTokenWithChars(PartialTok, getSourceLocation(),
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +0000237 Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr,
238 StringRef(Pos.BufferPtr,
239 Pos.BufferEnd - Pos.BufferPtr));
Dmitri Gribenkodb13f042012-07-24 17:52:18 +0000240 HavePartialTok = true;
241 Pos.CurToken++;
242 }
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +0000243
Dmitri Gribenkodb13f042012-07-24 17:52:18 +0000244 P.putBack(llvm::makeArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
245 Pos.CurToken = Toks.size();
246
247 if (HavePartialTok)
248 P.putBack(PartialTok);
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +0000249 }
250};
251
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000252Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
Dmitri Gribenkoaa580812012-08-09 00:03:17 +0000253 const SourceManager &SourceMgr, DiagnosticsEngine &Diags,
254 const CommandTraits &Traits):
255 L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags),
256 Traits(Traits) {
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000257 consumeToken();
258}
259
Dmitri Gribenko7d9b5112012-08-06 19:03:12 +0000260void Parser::parseParamCommandArgs(ParamCommandComment *PC,
261 TextTokenRetokenizer &Retokenizer) {
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000262 Token Arg;
263 // Check if argument looks like direction specification: [dir]
264 // e.g., [in], [out], [in,out]
265 if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
Dmitri Gribenko7d9b5112012-08-06 19:03:12 +0000266 S.actOnParamCommandDirectionArg(PC,
267 Arg.getLocation(),
268 Arg.getEndLocation(),
269 Arg.getText());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000270
271 if (Retokenizer.lexWord(Arg))
Dmitri Gribenko7d9b5112012-08-06 19:03:12 +0000272 S.actOnParamCommandParamNameArg(PC,
273 Arg.getLocation(),
274 Arg.getEndLocation(),
275 Arg.getText());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000276}
277
Dmitri Gribenko7d9b5112012-08-06 19:03:12 +0000278void Parser::parseTParamCommandArgs(TParamCommandComment *TPC,
279 TextTokenRetokenizer &Retokenizer) {
Dmitri Gribenko96b09862012-07-31 22:37:06 +0000280 Token Arg;
281 if (Retokenizer.lexWord(Arg))
Dmitri Gribenko7d9b5112012-08-06 19:03:12 +0000282 S.actOnTParamCommandParamNameArg(TPC,
283 Arg.getLocation(),
284 Arg.getEndLocation(),
285 Arg.getText());
Dmitri Gribenko96b09862012-07-31 22:37:06 +0000286}
287
Dmitri Gribenko7d9b5112012-08-06 19:03:12 +0000288void Parser::parseBlockCommandArgs(BlockCommandComment *BC,
289 TextTokenRetokenizer &Retokenizer,
290 unsigned NumArgs) {
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000291 typedef BlockCommandComment::Argument Argument;
Dmitri Gribenko814e2192012-07-06 16:41:59 +0000292 Argument *Args =
293 new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs];
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000294 unsigned ParsedArgs = 0;
295 Token Arg;
296 while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
297 Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),
298 Arg.getEndLocation()),
299 Arg.getText());
300 ParsedArgs++;
301 }
302
Dmitri Gribenko7d9b5112012-08-06 19:03:12 +0000303 S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000304}
305
306BlockCommandComment *Parser::parseBlockCommand() {
307 assert(Tok.is(tok::command));
308
309 ParamCommandComment *PC;
Dmitri Gribenko96b09862012-07-31 22:37:06 +0000310 TParamCommandComment *TPC;
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000311 BlockCommandComment *BC;
312 bool IsParam = false;
Dmitri Gribenko96b09862012-07-31 22:37:06 +0000313 bool IsTParam = false;
Dmitri Gribenkoe4330a32012-09-10 20:32:42 +0000314 const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
315 if (Info->IsParamCommand) {
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000316 IsParam = true;
317 PC = S.actOnParamCommandStart(Tok.getLocation(),
318 Tok.getEndLocation(),
Dmitri Gribenkoe4330a32012-09-10 20:32:42 +0000319 Tok.getCommandID());
Dmitri Gribenkoeb34db72012-12-19 17:17:09 +0000320 } else if (Info->IsTParamCommand) {
Dmitri Gribenko96b09862012-07-31 22:37:06 +0000321 IsTParam = true;
322 TPC = S.actOnTParamCommandStart(Tok.getLocation(),
323 Tok.getEndLocation(),
Dmitri Gribenkoe4330a32012-09-10 20:32:42 +0000324 Tok.getCommandID());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000325 } else {
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000326 BC = S.actOnBlockCommandStart(Tok.getLocation(),
327 Tok.getEndLocation(),
Dmitri Gribenkoe4330a32012-09-10 20:32:42 +0000328 Tok.getCommandID());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000329 }
330 consumeToken();
331
Dmitri Gribenko10442562013-01-26 00:36:14 +0000332 if (isTokBlockCommand()) {
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000333 // Block command ahead. We can't nest block commands, so pretend that this
334 // command has an empty argument.
Dmitri Gribenkoe5deb792012-07-30 18:05:28 +0000335 ParagraphComment *Paragraph = S.actOnParagraphComment(
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000336 ArrayRef<InlineContentComment *>());
Dmitri Gribenko7d9b5112012-08-06 19:03:12 +0000337 if (IsParam) {
Dmitri Gribenko8a903932012-08-06 23:48:44 +0000338 S.actOnParamCommandFinish(PC, Paragraph);
Dmitri Gribenko7d9b5112012-08-06 19:03:12 +0000339 return PC;
Dmitri Gribenko8a903932012-08-06 23:48:44 +0000340 } else if (IsTParam) {
341 S.actOnTParamCommandFinish(TPC, Paragraph);
342 return TPC;
Dmitri Gribenko7d9b5112012-08-06 19:03:12 +0000343 } else {
344 S.actOnBlockCommandFinish(BC, Paragraph);
345 return BC;
346 }
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000347 }
348
Dmitri Gribenkoe4330a32012-09-10 20:32:42 +0000349 if (IsParam || IsTParam || Info->NumArgs > 0) {
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000350 // In order to parse command arguments we need to retokenize a few
351 // following text tokens.
Dmitri Gribenkodb13f042012-07-24 17:52:18 +0000352 TextTokenRetokenizer Retokenizer(Allocator, *this);
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000353
354 if (IsParam)
Dmitri Gribenko7d9b5112012-08-06 19:03:12 +0000355 parseParamCommandArgs(PC, Retokenizer);
Dmitri Gribenko96b09862012-07-31 22:37:06 +0000356 else if (IsTParam)
Dmitri Gribenko7d9b5112012-08-06 19:03:12 +0000357 parseTParamCommandArgs(TPC, Retokenizer);
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000358 else
Dmitri Gribenkoe4330a32012-09-10 20:32:42 +0000359 parseBlockCommandArgs(BC, Retokenizer, Info->NumArgs);
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000360
Dmitri Gribenkodb13f042012-07-24 17:52:18 +0000361 Retokenizer.putBackLeftoverTokens();
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000362 }
363
Dmitri Gribenko10442562013-01-26 00:36:14 +0000364 // If there's a block command ahead, we will attach an empty paragraph to
365 // this command.
366 bool EmptyParagraph = false;
367 if (isTokBlockCommand())
368 EmptyParagraph = true;
369 else if (Tok.is(tok::newline)) {
370 Token PrevTok = Tok;
371 consumeToken();
372 EmptyParagraph = isTokBlockCommand();
373 putBack(PrevTok);
374 }
375
376 ParagraphComment *Paragraph;
377 if (EmptyParagraph)
378 Paragraph = S.actOnParagraphComment(ArrayRef<InlineContentComment *>());
379 else {
380 BlockContentComment *Block = parseParagraphOrBlockCommand();
381 // Since we have checked for a block command, we should have parsed a
382 // paragraph.
383 Paragraph = cast<ParagraphComment>(Block);
384 }
385
Dmitri Gribenko7d9b5112012-08-06 19:03:12 +0000386 if (IsParam) {
387 S.actOnParamCommandFinish(PC, Paragraph);
388 return PC;
389 } else if (IsTParam) {
390 S.actOnTParamCommandFinish(TPC, Paragraph);
391 return TPC;
392 } else {
393 S.actOnBlockCommandFinish(BC, Paragraph);
394 return BC;
395 }
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000396}
397
398InlineCommandComment *Parser::parseInlineCommand() {
399 assert(Tok.is(tok::command));
400
401 const Token CommandTok = Tok;
402 consumeToken();
403
Dmitri Gribenkodb13f042012-07-24 17:52:18 +0000404 TextTokenRetokenizer Retokenizer(Allocator, *this);
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000405
406 Token ArgTok;
407 bool ArgTokValid = Retokenizer.lexWord(ArgTok);
408
409 InlineCommandComment *IC;
410 if (ArgTokValid) {
411 IC = S.actOnInlineCommand(CommandTok.getLocation(),
412 CommandTok.getEndLocation(),
Dmitri Gribenkoe4330a32012-09-10 20:32:42 +0000413 CommandTok.getCommandID(),
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000414 ArgTok.getLocation(),
415 ArgTok.getEndLocation(),
416 ArgTok.getText());
417 } else {
418 IC = S.actOnInlineCommand(CommandTok.getLocation(),
419 CommandTok.getEndLocation(),
Dmitri Gribenkoe4330a32012-09-10 20:32:42 +0000420 CommandTok.getCommandID());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000421 }
422
Dmitri Gribenkodb13f042012-07-24 17:52:18 +0000423 Retokenizer.putBackLeftoverTokens();
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000424
425 return IC;
426}
427
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000428HTMLStartTagComment *Parser::parseHTMLStartTag() {
429 assert(Tok.is(tok::html_start_tag));
430 HTMLStartTagComment *HST =
431 S.actOnHTMLStartTagStart(Tok.getLocation(),
432 Tok.getHTMLTagStartName());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000433 consumeToken();
434
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000435 SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000436 while (true) {
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000437 switch (Tok.getKind()) {
438 case tok::html_ident: {
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000439 Token Ident = Tok;
440 consumeToken();
441 if (Tok.isNot(tok::html_equals)) {
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000442 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
443 Ident.getHTMLIdent()));
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000444 continue;
445 }
446 Token Equals = Tok;
447 consumeToken();
448 if (Tok.isNot(tok::html_quoted_string)) {
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000449 Diag(Tok.getLocation(),
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000450 diag::warn_doc_html_start_tag_expected_quoted_string)
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000451 << SourceRange(Equals.getLocation());
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000452 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
453 Ident.getHTMLIdent()));
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000454 while (Tok.is(tok::html_equals) ||
455 Tok.is(tok::html_quoted_string))
456 consumeToken();
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000457 continue;
458 }
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000459 Attrs.push_back(HTMLStartTagComment::Attribute(
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000460 Ident.getLocation(),
461 Ident.getHTMLIdent(),
462 Equals.getLocation(),
463 SourceRange(Tok.getLocation(),
464 Tok.getEndLocation()),
465 Tok.getHTMLQuotedString()));
466 consumeToken();
467 continue;
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000468 }
469
470 case tok::html_greater:
Dmitri Gribenko7d9b5112012-08-06 19:03:12 +0000471 S.actOnHTMLStartTagFinish(HST,
472 S.copyArray(llvm::makeArrayRef(Attrs)),
473 Tok.getLocation(),
474 /* IsSelfClosing = */ false);
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000475 consumeToken();
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000476 return HST;
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000477
478 case tok::html_slash_greater:
Dmitri Gribenko7d9b5112012-08-06 19:03:12 +0000479 S.actOnHTMLStartTagFinish(HST,
480 S.copyArray(llvm::makeArrayRef(Attrs)),
481 Tok.getLocation(),
482 /* IsSelfClosing = */ true);
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000483 consumeToken();
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000484 return HST;
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000485
486 case tok::html_equals:
487 case tok::html_quoted_string:
488 Diag(Tok.getLocation(),
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000489 diag::warn_doc_html_start_tag_expected_ident_or_greater);
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000490 while (Tok.is(tok::html_equals) ||
491 Tok.is(tok::html_quoted_string))
492 consumeToken();
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000493 if (Tok.is(tok::html_ident) ||
494 Tok.is(tok::html_greater) ||
495 Tok.is(tok::html_slash_greater))
496 continue;
497
Dmitri Gribenko7d9b5112012-08-06 19:03:12 +0000498 S.actOnHTMLStartTagFinish(HST,
499 S.copyArray(llvm::makeArrayRef(Attrs)),
500 SourceLocation(),
501 /* IsSelfClosing = */ false);
502 return HST;
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000503
504 default:
505 // Not a token from an HTML start tag. Thus HTML tag prematurely ended.
Dmitri Gribenko7d9b5112012-08-06 19:03:12 +0000506 S.actOnHTMLStartTagFinish(HST,
507 S.copyArray(llvm::makeArrayRef(Attrs)),
508 SourceLocation(),
509 /* IsSelfClosing = */ false);
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000510 bool StartLineInvalid;
511 const unsigned StartLine = SourceMgr.getPresumedLineNumber(
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000512 HST->getLocation(),
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000513 &StartLineInvalid);
514 bool EndLineInvalid;
515 const unsigned EndLine = SourceMgr.getPresumedLineNumber(
516 Tok.getLocation(),
517 &EndLineInvalid);
518 if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
519 Diag(Tok.getLocation(),
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000520 diag::warn_doc_html_start_tag_expected_ident_or_greater)
521 << HST->getSourceRange();
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000522 else {
523 Diag(Tok.getLocation(),
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000524 diag::warn_doc_html_start_tag_expected_ident_or_greater);
525 Diag(HST->getLocation(), diag::note_doc_html_tag_started_here)
526 << HST->getSourceRange();
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000527 }
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000528 return HST;
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000529 }
530 }
531}
532
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000533HTMLEndTagComment *Parser::parseHTMLEndTag() {
534 assert(Tok.is(tok::html_end_tag));
535 Token TokEndTag = Tok;
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000536 consumeToken();
537 SourceLocation Loc;
538 if (Tok.is(tok::html_greater)) {
539 Loc = Tok.getLocation();
540 consumeToken();
541 }
542
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000543 return S.actOnHTMLEndTag(TokEndTag.getLocation(),
544 Loc,
545 TokEndTag.getHTMLTagEndName());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000546}
547
548BlockContentComment *Parser::parseParagraphOrBlockCommand() {
549 SmallVector<InlineContentComment *, 8> Content;
550
551 while (true) {
552 switch (Tok.getKind()) {
553 case tok::verbatim_block_begin:
554 case tok::verbatim_line_name:
555 case tok::eof:
556 assert(Content.size() != 0);
557 break; // Block content or EOF ahead, finish this parapgaph.
558
Dmitri Gribenkoe4330a32012-09-10 20:32:42 +0000559 case tok::unknown_command:
560 Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
561 Tok.getEndLocation(),
562 Tok.getUnknownCommandName()));
563 consumeToken();
564 continue;
565
566 case tok::command: {
567 const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
568 if (Info->IsBlockCommand) {
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000569 if (Content.size() == 0)
570 return parseBlockCommand();
571 break; // Block command ahead, finish this parapgaph.
572 }
Dmitri Gribenko36cbbe92012-11-18 00:30:31 +0000573 if (Info->IsVerbatimBlockEndCommand) {
574 Diag(Tok.getLocation(),
575 diag::warn_verbatim_block_end_without_start)
576 << Info->Name
577 << SourceRange(Tok.getLocation(), Tok.getEndLocation());
578 consumeToken();
579 continue;
580 }
Dmitri Gribenkob0b8a962012-09-11 19:22:03 +0000581 if (Info->IsUnknownCommand) {
582 Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
583 Tok.getEndLocation(),
584 Info->getID()));
585 consumeToken();
586 continue;
587 }
Dmitri Gribenkoe4330a32012-09-10 20:32:42 +0000588 assert(Info->IsInlineCommand);
589 Content.push_back(parseInlineCommand());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000590 continue;
Dmitri Gribenkoe4330a32012-09-10 20:32:42 +0000591 }
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000592
593 case tok::newline: {
594 consumeToken();
595 if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
596 consumeToken();
597 break; // Two newlines -- end of paragraph.
598 }
599 if (Content.size() > 0)
600 Content.back()->addTrailingNewline();
601 continue;
602 }
603
604 // Don't deal with HTML tag soup now.
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000605 case tok::html_start_tag:
606 Content.push_back(parseHTMLStartTag());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000607 continue;
608
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000609 case tok::html_end_tag:
610 Content.push_back(parseHTMLEndTag());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000611 continue;
612
613 case tok::text:
614 Content.push_back(S.actOnText(Tok.getLocation(),
615 Tok.getEndLocation(),
616 Tok.getText()));
617 consumeToken();
618 continue;
619
620 case tok::verbatim_block_line:
621 case tok::verbatim_block_end:
622 case tok::verbatim_line_text:
623 case tok::html_ident:
624 case tok::html_equals:
625 case tok::html_quoted_string:
626 case tok::html_greater:
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000627 case tok::html_slash_greater:
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000628 llvm_unreachable("should not see this token");
629 }
630 break;
631 }
632
Dmitri Gribenko96b09862012-07-31 22:37:06 +0000633 return S.actOnParagraphComment(S.copyArray(llvm::makeArrayRef(Content)));
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000634}
635
636VerbatimBlockComment *Parser::parseVerbatimBlock() {
637 assert(Tok.is(tok::verbatim_block_begin));
638
639 VerbatimBlockComment *VB =
640 S.actOnVerbatimBlockStart(Tok.getLocation(),
Dmitri Gribenkoe4330a32012-09-10 20:32:42 +0000641 Tok.getVerbatimBlockID());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000642 consumeToken();
643
644 // Don't create an empty line if verbatim opening command is followed
645 // by a newline.
646 if (Tok.is(tok::newline))
647 consumeToken();
648
649 SmallVector<VerbatimBlockLineComment *, 8> Lines;
650 while (Tok.is(tok::verbatim_block_line) ||
651 Tok.is(tok::newline)) {
652 VerbatimBlockLineComment *Line;
653 if (Tok.is(tok::verbatim_block_line)) {
654 Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
655 Tok.getVerbatimBlockText());
656 consumeToken();
657 if (Tok.is(tok::newline)) {
658 consumeToken();
659 }
660 } else {
661 // Empty line, just a tok::newline.
Dmitri Gribenko94572c32012-07-18 21:27:38 +0000662 Line = S.actOnVerbatimBlockLine(Tok.getLocation(), "");
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000663 consumeToken();
664 }
665 Lines.push_back(Line);
666 }
667
Dmitri Gribenko9f08f492012-07-20 20:18:53 +0000668 if (Tok.is(tok::verbatim_block_end)) {
Dmitri Gribenkoe4330a32012-09-10 20:32:42 +0000669 const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID());
Dmitri Gribenko7d9b5112012-08-06 19:03:12 +0000670 S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),
Dmitri Gribenkoe4330a32012-09-10 20:32:42 +0000671 Info->Name,
Dmitri Gribenko7d9b5112012-08-06 19:03:12 +0000672 S.copyArray(llvm::makeArrayRef(Lines)));
Dmitri Gribenko9f08f492012-07-20 20:18:53 +0000673 consumeToken();
674 } else {
675 // Unterminated \\verbatim block
Dmitri Gribenko7d9b5112012-08-06 19:03:12 +0000676 S.actOnVerbatimBlockFinish(VB, SourceLocation(), "",
677 S.copyArray(llvm::makeArrayRef(Lines)));
Dmitri Gribenko9f08f492012-07-20 20:18:53 +0000678 }
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000679
680 return VB;
681}
682
683VerbatimLineComment *Parser::parseVerbatimLine() {
684 assert(Tok.is(tok::verbatim_line_name));
685
686 Token NameTok = Tok;
687 consumeToken();
688
689 SourceLocation TextBegin;
690 StringRef Text;
691 // Next token might not be a tok::verbatim_line_text if verbatim line
692 // starting command comes just before a newline or comment end.
693 if (Tok.is(tok::verbatim_line_text)) {
694 TextBegin = Tok.getLocation();
695 Text = Tok.getVerbatimLineText();
696 } else {
697 TextBegin = NameTok.getEndLocation();
698 Text = "";
699 }
700
701 VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
Dmitri Gribenkoe4330a32012-09-10 20:32:42 +0000702 NameTok.getVerbatimLineID(),
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000703 TextBegin,
704 Text);
705 consumeToken();
706 return VL;
707}
708
709BlockContentComment *Parser::parseBlockContent() {
710 switch (Tok.getKind()) {
711 case tok::text:
Dmitri Gribenkoe4330a32012-09-10 20:32:42 +0000712 case tok::unknown_command:
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000713 case tok::command:
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000714 case tok::html_start_tag:
715 case tok::html_end_tag:
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000716 return parseParagraphOrBlockCommand();
717
718 case tok::verbatim_block_begin:
719 return parseVerbatimBlock();
720
721 case tok::verbatim_line_name:
722 return parseVerbatimLine();
723
724 case tok::eof:
725 case tok::newline:
726 case tok::verbatim_block_line:
727 case tok::verbatim_block_end:
728 case tok::verbatim_line_text:
729 case tok::html_ident:
730 case tok::html_equals:
731 case tok::html_quoted_string:
732 case tok::html_greater:
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000733 case tok::html_slash_greater:
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000734 llvm_unreachable("should not see this token");
735 }
Matt Beaumont-Gay4d48b5c2012-07-06 21:13:09 +0000736 llvm_unreachable("bogus token kind");
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000737}
738
739FullComment *Parser::parseFullComment() {
740 // Skip newlines at the beginning of the comment.
741 while (Tok.is(tok::newline))
742 consumeToken();
743
744 SmallVector<BlockContentComment *, 8> Blocks;
745 while (Tok.isNot(tok::eof)) {
746 Blocks.push_back(parseBlockContent());
747
748 // Skip extra newlines after paragraph end.
749 while (Tok.is(tok::newline))
750 consumeToken();
751 }
Dmitri Gribenko96b09862012-07-31 22:37:06 +0000752 return S.actOnFullComment(S.copyArray(llvm::makeArrayRef(Blocks)));
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000753}
754
755} // end namespace comments
756} // end namespace clang