blob: 6b7e0ab49d40a297804304d70064e47df9851465 [file] [log] [blame]
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +00001//===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "clang/AST/CommentParser.h"
11#include "clang/AST/CommentSema.h"
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +000012#include "clang/AST/CommentDiagnostic.h"
13#include "clang/Basic/SourceManager.h"
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +000014#include "llvm/Support/ErrorHandling.h"
15
16namespace clang {
17namespace comments {
18
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +000019Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
20 const SourceManager &SourceMgr, DiagnosticsEngine &Diags):
21 L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags) {
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +000022 consumeToken();
23}
24
25ParamCommandComment *Parser::parseParamCommandArgs(
26 ParamCommandComment *PC,
27 TextTokenRetokenizer &Retokenizer) {
28 Token Arg;
29 // Check if argument looks like direction specification: [dir]
30 // e.g., [in], [out], [in,out]
31 if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +000032 PC = S.actOnParamCommandDirectionArg(PC,
33 Arg.getLocation(),
34 Arg.getEndLocation(),
35 Arg.getText());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +000036
37 if (Retokenizer.lexWord(Arg))
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +000038 PC = S.actOnParamCommandParamNameArg(PC,
39 Arg.getLocation(),
40 Arg.getEndLocation(),
41 Arg.getText());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +000042
43 return PC;
44}
45
46BlockCommandComment *Parser::parseBlockCommandArgs(
47 BlockCommandComment *BC,
48 TextTokenRetokenizer &Retokenizer,
49 unsigned NumArgs) {
50 typedef BlockCommandComment::Argument Argument;
Dmitri Gribenko814e2192012-07-06 16:41:59 +000051 Argument *Args =
52 new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs];
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +000053 unsigned ParsedArgs = 0;
54 Token Arg;
55 while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
56 Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),
57 Arg.getEndLocation()),
58 Arg.getText());
59 ParsedArgs++;
60 }
61
62 return S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));
63}
64
65BlockCommandComment *Parser::parseBlockCommand() {
66 assert(Tok.is(tok::command));
67
68 ParamCommandComment *PC;
69 BlockCommandComment *BC;
70 bool IsParam = false;
71 unsigned NumArgs = 0;
72 if (S.isParamCommand(Tok.getCommandName())) {
73 IsParam = true;
74 PC = S.actOnParamCommandStart(Tok.getLocation(),
75 Tok.getEndLocation(),
76 Tok.getCommandName());
77 } else {
78 NumArgs = S.getBlockCommandNumArgs(Tok.getCommandName());
79 BC = S.actOnBlockCommandStart(Tok.getLocation(),
80 Tok.getEndLocation(),
81 Tok.getCommandName());
82 }
83 consumeToken();
84
85 if (Tok.is(tok::command) && S.isBlockCommand(Tok.getCommandName())) {
86 // Block command ahead. We can't nest block commands, so pretend that this
87 // command has an empty argument.
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +000088 ParagraphComment *PC = S.actOnParagraphComment(
89 ArrayRef<InlineContentComment *>());
90 return S.actOnBlockCommandFinish(BC, PC);
91 }
92
93 if (IsParam || NumArgs > 0) {
94 // In order to parse command arguments we need to retokenize a few
95 // following text tokens.
96 TextTokenRetokenizer Retokenizer(Allocator);
97 while (Tok.is(tok::text)) {
98 if (Retokenizer.addToken(Tok))
99 consumeToken();
100 }
101
102 if (IsParam)
103 PC = parseParamCommandArgs(PC, Retokenizer);
104 else
105 BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs);
106
107 // Put back tokens we didn't use.
108 Token Text;
109 while (Retokenizer.lexText(Text))
110 putBack(Text);
111 }
112
113 BlockContentComment *Block = parseParagraphOrBlockCommand();
114 // Since we have checked for a block command, we should have parsed a
115 // paragraph.
116 if (IsParam)
117 return S.actOnParamCommandFinish(PC, cast<ParagraphComment>(Block));
118 else
119 return S.actOnBlockCommandFinish(BC, cast<ParagraphComment>(Block));
120}
121
122InlineCommandComment *Parser::parseInlineCommand() {
123 assert(Tok.is(tok::command));
124
125 const Token CommandTok = Tok;
126 consumeToken();
127
128 TextTokenRetokenizer Retokenizer(Allocator);
129 while (Tok.is(tok::text)) {
130 if (Retokenizer.addToken(Tok))
131 consumeToken();
132 }
133
134 Token ArgTok;
135 bool ArgTokValid = Retokenizer.lexWord(ArgTok);
136
137 InlineCommandComment *IC;
138 if (ArgTokValid) {
139 IC = S.actOnInlineCommand(CommandTok.getLocation(),
140 CommandTok.getEndLocation(),
141 CommandTok.getCommandName(),
142 ArgTok.getLocation(),
143 ArgTok.getEndLocation(),
144 ArgTok.getText());
145 } else {
146 IC = S.actOnInlineCommand(CommandTok.getLocation(),
147 CommandTok.getEndLocation(),
148 CommandTok.getCommandName());
149 }
150
151 Token Text;
152 while (Retokenizer.lexText(Text))
153 putBack(Text);
154
155 return IC;
156}
157
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000158HTMLStartTagComment *Parser::parseHTMLStartTag() {
159 assert(Tok.is(tok::html_start_tag));
160 HTMLStartTagComment *HST =
161 S.actOnHTMLStartTagStart(Tok.getLocation(),
162 Tok.getHTMLTagStartName());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000163 consumeToken();
164
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000165 SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000166 while (true) {
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000167 switch (Tok.getKind()) {
168 case tok::html_ident: {
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000169 Token Ident = Tok;
170 consumeToken();
171 if (Tok.isNot(tok::html_equals)) {
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000172 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
173 Ident.getHTMLIdent()));
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000174 continue;
175 }
176 Token Equals = Tok;
177 consumeToken();
178 if (Tok.isNot(tok::html_quoted_string)) {
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000179 Diag(Tok.getLocation(),
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000180 diag::warn_doc_html_start_tag_expected_quoted_string)
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000181 << SourceRange(Equals.getLocation());
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000182 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
183 Ident.getHTMLIdent()));
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000184 while (Tok.is(tok::html_equals) ||
185 Tok.is(tok::html_quoted_string))
186 consumeToken();
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000187 continue;
188 }
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000189 Attrs.push_back(HTMLStartTagComment::Attribute(
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000190 Ident.getLocation(),
191 Ident.getHTMLIdent(),
192 Equals.getLocation(),
193 SourceRange(Tok.getLocation(),
194 Tok.getEndLocation()),
195 Tok.getHTMLQuotedString()));
196 consumeToken();
197 continue;
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000198 }
199
200 case tok::html_greater:
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000201 HST = S.actOnHTMLStartTagFinish(HST,
202 copyArray(llvm::makeArrayRef(Attrs)),
203 Tok.getLocation(),
204 /* IsSelfClosing = */ false);
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000205 consumeToken();
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000206 return HST;
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000207
208 case tok::html_slash_greater:
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000209 HST = S.actOnHTMLStartTagFinish(HST,
210 copyArray(llvm::makeArrayRef(Attrs)),
211 Tok.getLocation(),
212 /* IsSelfClosing = */ true);
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000213 consumeToken();
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000214 return HST;
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000215
216 case tok::html_equals:
217 case tok::html_quoted_string:
218 Diag(Tok.getLocation(),
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000219 diag::warn_doc_html_start_tag_expected_ident_or_greater);
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000220 while (Tok.is(tok::html_equals) ||
221 Tok.is(tok::html_quoted_string))
222 consumeToken();
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000223 if (Tok.is(tok::html_ident) ||
224 Tok.is(tok::html_greater) ||
225 Tok.is(tok::html_slash_greater))
226 continue;
227
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000228 return S.actOnHTMLStartTagFinish(HST,
229 copyArray(llvm::makeArrayRef(Attrs)),
230 SourceLocation(),
231 /* IsSelfClosing = */ false);
232
233 default:
234 // Not a token from an HTML start tag. Thus HTML tag prematurely ended.
235 HST = S.actOnHTMLStartTagFinish(HST,
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000236 copyArray(llvm::makeArrayRef(Attrs)),
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000237 SourceLocation(),
238 /* IsSelfClosing = */ false);
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000239 bool StartLineInvalid;
240 const unsigned StartLine = SourceMgr.getPresumedLineNumber(
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000241 HST->getLocation(),
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000242 &StartLineInvalid);
243 bool EndLineInvalid;
244 const unsigned EndLine = SourceMgr.getPresumedLineNumber(
245 Tok.getLocation(),
246 &EndLineInvalid);
247 if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
248 Diag(Tok.getLocation(),
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000249 diag::warn_doc_html_start_tag_expected_ident_or_greater)
250 << HST->getSourceRange();
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000251 else {
252 Diag(Tok.getLocation(),
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000253 diag::warn_doc_html_start_tag_expected_ident_or_greater);
254 Diag(HST->getLocation(), diag::note_doc_html_tag_started_here)
255 << HST->getSourceRange();
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000256 }
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000257 return HST;
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000258 }
259 }
260}
261
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000262HTMLEndTagComment *Parser::parseHTMLEndTag() {
263 assert(Tok.is(tok::html_end_tag));
264 Token TokEndTag = Tok;
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000265 consumeToken();
266 SourceLocation Loc;
267 if (Tok.is(tok::html_greater)) {
268 Loc = Tok.getLocation();
269 consumeToken();
270 }
271
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000272 return S.actOnHTMLEndTag(TokEndTag.getLocation(),
273 Loc,
274 TokEndTag.getHTMLTagEndName());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000275}
276
277BlockContentComment *Parser::parseParagraphOrBlockCommand() {
278 SmallVector<InlineContentComment *, 8> Content;
279
280 while (true) {
281 switch (Tok.getKind()) {
282 case tok::verbatim_block_begin:
283 case tok::verbatim_line_name:
284 case tok::eof:
285 assert(Content.size() != 0);
286 break; // Block content or EOF ahead, finish this parapgaph.
287
288 case tok::command:
289 if (S.isBlockCommand(Tok.getCommandName())) {
290 if (Content.size() == 0)
291 return parseBlockCommand();
292 break; // Block command ahead, finish this parapgaph.
293 }
294 if (S.isInlineCommand(Tok.getCommandName())) {
295 Content.push_back(parseInlineCommand());
296 continue;
297 }
298
299 // Not a block command, not an inline command ==> an unknown command.
300 Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
301 Tok.getEndLocation(),
302 Tok.getCommandName()));
303 consumeToken();
304 continue;
305
306 case tok::newline: {
307 consumeToken();
308 if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
309 consumeToken();
310 break; // Two newlines -- end of paragraph.
311 }
312 if (Content.size() > 0)
313 Content.back()->addTrailingNewline();
314 continue;
315 }
316
317 // Don't deal with HTML tag soup now.
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000318 case tok::html_start_tag:
319 Content.push_back(parseHTMLStartTag());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000320 continue;
321
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000322 case tok::html_end_tag:
323 Content.push_back(parseHTMLEndTag());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000324 continue;
325
326 case tok::text:
327 Content.push_back(S.actOnText(Tok.getLocation(),
328 Tok.getEndLocation(),
329 Tok.getText()));
330 consumeToken();
331 continue;
332
333 case tok::verbatim_block_line:
334 case tok::verbatim_block_end:
335 case tok::verbatim_line_text:
336 case tok::html_ident:
337 case tok::html_equals:
338 case tok::html_quoted_string:
339 case tok::html_greater:
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000340 case tok::html_slash_greater:
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000341 llvm_unreachable("should not see this token");
342 }
343 break;
344 }
345
346 return S.actOnParagraphComment(copyArray(llvm::makeArrayRef(Content)));
347}
348
349VerbatimBlockComment *Parser::parseVerbatimBlock() {
350 assert(Tok.is(tok::verbatim_block_begin));
351
352 VerbatimBlockComment *VB =
353 S.actOnVerbatimBlockStart(Tok.getLocation(),
354 Tok.getVerbatimBlockName());
355 consumeToken();
356
357 // Don't create an empty line if verbatim opening command is followed
358 // by a newline.
359 if (Tok.is(tok::newline))
360 consumeToken();
361
362 SmallVector<VerbatimBlockLineComment *, 8> Lines;
363 while (Tok.is(tok::verbatim_block_line) ||
364 Tok.is(tok::newline)) {
365 VerbatimBlockLineComment *Line;
366 if (Tok.is(tok::verbatim_block_line)) {
367 Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
368 Tok.getVerbatimBlockText());
369 consumeToken();
370 if (Tok.is(tok::newline)) {
371 consumeToken();
372 }
373 } else {
374 // Empty line, just a tok::newline.
Dmitri Gribenko94572c32012-07-18 21:27:38 +0000375 Line = S.actOnVerbatimBlockLine(Tok.getLocation(), "");
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000376 consumeToken();
377 }
378 Lines.push_back(Line);
379 }
380
Dmitri Gribenko9f08f492012-07-20 20:18:53 +0000381 if (Tok.is(tok::verbatim_block_end)) {
382 VB = S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),
383 Tok.getVerbatimBlockName(),
384 copyArray(llvm::makeArrayRef(Lines)));
385 consumeToken();
386 } else {
387 // Unterminated \\verbatim block
388 VB = S.actOnVerbatimBlockFinish(VB, SourceLocation(), "",
389 copyArray(llvm::makeArrayRef(Lines)));
390 }
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000391
392 return VB;
393}
394
395VerbatimLineComment *Parser::parseVerbatimLine() {
396 assert(Tok.is(tok::verbatim_line_name));
397
398 Token NameTok = Tok;
399 consumeToken();
400
401 SourceLocation TextBegin;
402 StringRef Text;
403 // Next token might not be a tok::verbatim_line_text if verbatim line
404 // starting command comes just before a newline or comment end.
405 if (Tok.is(tok::verbatim_line_text)) {
406 TextBegin = Tok.getLocation();
407 Text = Tok.getVerbatimLineText();
408 } else {
409 TextBegin = NameTok.getEndLocation();
410 Text = "";
411 }
412
413 VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
414 NameTok.getVerbatimLineName(),
415 TextBegin,
416 Text);
417 consumeToken();
418 return VL;
419}
420
421BlockContentComment *Parser::parseBlockContent() {
422 switch (Tok.getKind()) {
423 case tok::text:
424 case tok::command:
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000425 case tok::html_start_tag:
426 case tok::html_end_tag:
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000427 return parseParagraphOrBlockCommand();
428
429 case tok::verbatim_block_begin:
430 return parseVerbatimBlock();
431
432 case tok::verbatim_line_name:
433 return parseVerbatimLine();
434
435 case tok::eof:
436 case tok::newline:
437 case tok::verbatim_block_line:
438 case tok::verbatim_block_end:
439 case tok::verbatim_line_text:
440 case tok::html_ident:
441 case tok::html_equals:
442 case tok::html_quoted_string:
443 case tok::html_greater:
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000444 case tok::html_slash_greater:
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000445 llvm_unreachable("should not see this token");
446 }
Matt Beaumont-Gay4d48b5c2012-07-06 21:13:09 +0000447 llvm_unreachable("bogus token kind");
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000448}
449
450FullComment *Parser::parseFullComment() {
451 // Skip newlines at the beginning of the comment.
452 while (Tok.is(tok::newline))
453 consumeToken();
454
455 SmallVector<BlockContentComment *, 8> Blocks;
456 while (Tok.isNot(tok::eof)) {
457 Blocks.push_back(parseBlockContent());
458
459 // Skip extra newlines after paragraph end.
460 while (Tok.is(tok::newline))
461 consumeToken();
462 }
463 return S.actOnFullComment(copyArray(llvm::makeArrayRef(Blocks)));
464}
465
466} // end namespace comments
467} // end namespace clang