blob: 92ea7042ff16124e03e706aa20c90a5abc84e639 [file] [log] [blame]
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +00001//===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "clang/AST/CommentParser.h"
11#include "clang/AST/CommentSema.h"
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +000012#include "clang/AST/CommentDiagnostic.h"
13#include "clang/Basic/SourceManager.h"
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +000014#include "llvm/Support/ErrorHandling.h"
15
16namespace clang {
17namespace comments {
18
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +000019Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
20 const SourceManager &SourceMgr, DiagnosticsEngine &Diags):
21 L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags) {
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +000022 consumeToken();
23}
24
25ParamCommandComment *Parser::parseParamCommandArgs(
26 ParamCommandComment *PC,
27 TextTokenRetokenizer &Retokenizer) {
28 Token Arg;
29 // Check if argument looks like direction specification: [dir]
30 // e.g., [in], [out], [in,out]
31 if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +000032 PC = S.actOnParamCommandDirectionArg(PC,
33 Arg.getLocation(),
34 Arg.getEndLocation(),
35 Arg.getText());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +000036
37 if (Retokenizer.lexWord(Arg))
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +000038 PC = S.actOnParamCommandParamNameArg(PC,
39 Arg.getLocation(),
40 Arg.getEndLocation(),
41 Arg.getText());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +000042
43 return PC;
44}
45
46BlockCommandComment *Parser::parseBlockCommandArgs(
47 BlockCommandComment *BC,
48 TextTokenRetokenizer &Retokenizer,
49 unsigned NumArgs) {
50 typedef BlockCommandComment::Argument Argument;
Dmitri Gribenko814e2192012-07-06 16:41:59 +000051 Argument *Args =
52 new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs];
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +000053 unsigned ParsedArgs = 0;
54 Token Arg;
55 while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
56 Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),
57 Arg.getEndLocation()),
58 Arg.getText());
59 ParsedArgs++;
60 }
61
62 return S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));
63}
64
65BlockCommandComment *Parser::parseBlockCommand() {
66 assert(Tok.is(tok::command));
67
68 ParamCommandComment *PC;
69 BlockCommandComment *BC;
70 bool IsParam = false;
71 unsigned NumArgs = 0;
72 if (S.isParamCommand(Tok.getCommandName())) {
73 IsParam = true;
74 PC = S.actOnParamCommandStart(Tok.getLocation(),
75 Tok.getEndLocation(),
76 Tok.getCommandName());
77 } else {
78 NumArgs = S.getBlockCommandNumArgs(Tok.getCommandName());
79 BC = S.actOnBlockCommandStart(Tok.getLocation(),
80 Tok.getEndLocation(),
81 Tok.getCommandName());
82 }
83 consumeToken();
84
85 if (Tok.is(tok::command) && S.isBlockCommand(Tok.getCommandName())) {
86 // Block command ahead. We can't nest block commands, so pretend that this
87 // command has an empty argument.
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +000088 ParagraphComment *PC = S.actOnParagraphComment(
89 ArrayRef<InlineContentComment *>());
90 return S.actOnBlockCommandFinish(BC, PC);
91 }
92
93 if (IsParam || NumArgs > 0) {
94 // In order to parse command arguments we need to retokenize a few
95 // following text tokens.
96 TextTokenRetokenizer Retokenizer(Allocator);
97 while (Tok.is(tok::text)) {
98 if (Retokenizer.addToken(Tok))
99 consumeToken();
100 }
101
102 if (IsParam)
103 PC = parseParamCommandArgs(PC, Retokenizer);
104 else
105 BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs);
106
107 // Put back tokens we didn't use.
Dmitri Gribenkofd939162012-07-24 16:10:47 +0000108 SmallVector<Token, 16> TextToks;
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000109 Token Text;
Dmitri Gribenkofd939162012-07-24 16:10:47 +0000110 while (Retokenizer.lexText(Text)) {
111 TextToks.push_back(Text);
112 }
113 putBack(TextToks);
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000114 }
115
116 BlockContentComment *Block = parseParagraphOrBlockCommand();
117 // Since we have checked for a block command, we should have parsed a
118 // paragraph.
119 if (IsParam)
120 return S.actOnParamCommandFinish(PC, cast<ParagraphComment>(Block));
121 else
122 return S.actOnBlockCommandFinish(BC, cast<ParagraphComment>(Block));
123}
124
125InlineCommandComment *Parser::parseInlineCommand() {
126 assert(Tok.is(tok::command));
127
128 const Token CommandTok = Tok;
129 consumeToken();
130
131 TextTokenRetokenizer Retokenizer(Allocator);
132 while (Tok.is(tok::text)) {
133 if (Retokenizer.addToken(Tok))
134 consumeToken();
135 }
136
137 Token ArgTok;
138 bool ArgTokValid = Retokenizer.lexWord(ArgTok);
139
140 InlineCommandComment *IC;
141 if (ArgTokValid) {
142 IC = S.actOnInlineCommand(CommandTok.getLocation(),
143 CommandTok.getEndLocation(),
144 CommandTok.getCommandName(),
145 ArgTok.getLocation(),
146 ArgTok.getEndLocation(),
147 ArgTok.getText());
148 } else {
149 IC = S.actOnInlineCommand(CommandTok.getLocation(),
150 CommandTok.getEndLocation(),
151 CommandTok.getCommandName());
152 }
153
154 Token Text;
155 while (Retokenizer.lexText(Text))
156 putBack(Text);
157
158 return IC;
159}
160
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000161HTMLStartTagComment *Parser::parseHTMLStartTag() {
162 assert(Tok.is(tok::html_start_tag));
163 HTMLStartTagComment *HST =
164 S.actOnHTMLStartTagStart(Tok.getLocation(),
165 Tok.getHTMLTagStartName());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000166 consumeToken();
167
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000168 SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000169 while (true) {
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000170 switch (Tok.getKind()) {
171 case tok::html_ident: {
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000172 Token Ident = Tok;
173 consumeToken();
174 if (Tok.isNot(tok::html_equals)) {
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000175 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
176 Ident.getHTMLIdent()));
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000177 continue;
178 }
179 Token Equals = Tok;
180 consumeToken();
181 if (Tok.isNot(tok::html_quoted_string)) {
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000182 Diag(Tok.getLocation(),
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000183 diag::warn_doc_html_start_tag_expected_quoted_string)
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000184 << SourceRange(Equals.getLocation());
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000185 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
186 Ident.getHTMLIdent()));
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000187 while (Tok.is(tok::html_equals) ||
188 Tok.is(tok::html_quoted_string))
189 consumeToken();
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000190 continue;
191 }
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000192 Attrs.push_back(HTMLStartTagComment::Attribute(
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000193 Ident.getLocation(),
194 Ident.getHTMLIdent(),
195 Equals.getLocation(),
196 SourceRange(Tok.getLocation(),
197 Tok.getEndLocation()),
198 Tok.getHTMLQuotedString()));
199 consumeToken();
200 continue;
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000201 }
202
203 case tok::html_greater:
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000204 HST = S.actOnHTMLStartTagFinish(HST,
205 copyArray(llvm::makeArrayRef(Attrs)),
206 Tok.getLocation(),
207 /* IsSelfClosing = */ false);
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000208 consumeToken();
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000209 return HST;
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000210
211 case tok::html_slash_greater:
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000212 HST = S.actOnHTMLStartTagFinish(HST,
213 copyArray(llvm::makeArrayRef(Attrs)),
214 Tok.getLocation(),
215 /* IsSelfClosing = */ true);
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000216 consumeToken();
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000217 return HST;
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000218
219 case tok::html_equals:
220 case tok::html_quoted_string:
221 Diag(Tok.getLocation(),
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000222 diag::warn_doc_html_start_tag_expected_ident_or_greater);
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000223 while (Tok.is(tok::html_equals) ||
224 Tok.is(tok::html_quoted_string))
225 consumeToken();
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000226 if (Tok.is(tok::html_ident) ||
227 Tok.is(tok::html_greater) ||
228 Tok.is(tok::html_slash_greater))
229 continue;
230
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000231 return S.actOnHTMLStartTagFinish(HST,
232 copyArray(llvm::makeArrayRef(Attrs)),
233 SourceLocation(),
234 /* IsSelfClosing = */ false);
235
236 default:
237 // Not a token from an HTML start tag. Thus HTML tag prematurely ended.
238 HST = S.actOnHTMLStartTagFinish(HST,
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000239 copyArray(llvm::makeArrayRef(Attrs)),
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000240 SourceLocation(),
241 /* IsSelfClosing = */ false);
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000242 bool StartLineInvalid;
243 const unsigned StartLine = SourceMgr.getPresumedLineNumber(
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000244 HST->getLocation(),
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000245 &StartLineInvalid);
246 bool EndLineInvalid;
247 const unsigned EndLine = SourceMgr.getPresumedLineNumber(
248 Tok.getLocation(),
249 &EndLineInvalid);
250 if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
251 Diag(Tok.getLocation(),
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000252 diag::warn_doc_html_start_tag_expected_ident_or_greater)
253 << HST->getSourceRange();
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000254 else {
255 Diag(Tok.getLocation(),
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000256 diag::warn_doc_html_start_tag_expected_ident_or_greater);
257 Diag(HST->getLocation(), diag::note_doc_html_tag_started_here)
258 << HST->getSourceRange();
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000259 }
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000260 return HST;
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000261 }
262 }
263}
264
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000265HTMLEndTagComment *Parser::parseHTMLEndTag() {
266 assert(Tok.is(tok::html_end_tag));
267 Token TokEndTag = Tok;
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000268 consumeToken();
269 SourceLocation Loc;
270 if (Tok.is(tok::html_greater)) {
271 Loc = Tok.getLocation();
272 consumeToken();
273 }
274
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000275 return S.actOnHTMLEndTag(TokEndTag.getLocation(),
276 Loc,
277 TokEndTag.getHTMLTagEndName());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000278}
279
280BlockContentComment *Parser::parseParagraphOrBlockCommand() {
281 SmallVector<InlineContentComment *, 8> Content;
282
283 while (true) {
284 switch (Tok.getKind()) {
285 case tok::verbatim_block_begin:
286 case tok::verbatim_line_name:
287 case tok::eof:
288 assert(Content.size() != 0);
289 break; // Block content or EOF ahead, finish this parapgaph.
290
291 case tok::command:
292 if (S.isBlockCommand(Tok.getCommandName())) {
293 if (Content.size() == 0)
294 return parseBlockCommand();
295 break; // Block command ahead, finish this parapgaph.
296 }
297 if (S.isInlineCommand(Tok.getCommandName())) {
298 Content.push_back(parseInlineCommand());
299 continue;
300 }
301
302 // Not a block command, not an inline command ==> an unknown command.
303 Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
304 Tok.getEndLocation(),
305 Tok.getCommandName()));
306 consumeToken();
307 continue;
308
309 case tok::newline: {
310 consumeToken();
311 if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
312 consumeToken();
313 break; // Two newlines -- end of paragraph.
314 }
315 if (Content.size() > 0)
316 Content.back()->addTrailingNewline();
317 continue;
318 }
319
320 // Don't deal with HTML tag soup now.
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000321 case tok::html_start_tag:
322 Content.push_back(parseHTMLStartTag());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000323 continue;
324
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000325 case tok::html_end_tag:
326 Content.push_back(parseHTMLEndTag());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000327 continue;
328
329 case tok::text:
330 Content.push_back(S.actOnText(Tok.getLocation(),
331 Tok.getEndLocation(),
332 Tok.getText()));
333 consumeToken();
334 continue;
335
336 case tok::verbatim_block_line:
337 case tok::verbatim_block_end:
338 case tok::verbatim_line_text:
339 case tok::html_ident:
340 case tok::html_equals:
341 case tok::html_quoted_string:
342 case tok::html_greater:
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000343 case tok::html_slash_greater:
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000344 llvm_unreachable("should not see this token");
345 }
346 break;
347 }
348
349 return S.actOnParagraphComment(copyArray(llvm::makeArrayRef(Content)));
350}
351
352VerbatimBlockComment *Parser::parseVerbatimBlock() {
353 assert(Tok.is(tok::verbatim_block_begin));
354
355 VerbatimBlockComment *VB =
356 S.actOnVerbatimBlockStart(Tok.getLocation(),
357 Tok.getVerbatimBlockName());
358 consumeToken();
359
360 // Don't create an empty line if verbatim opening command is followed
361 // by a newline.
362 if (Tok.is(tok::newline))
363 consumeToken();
364
365 SmallVector<VerbatimBlockLineComment *, 8> Lines;
366 while (Tok.is(tok::verbatim_block_line) ||
367 Tok.is(tok::newline)) {
368 VerbatimBlockLineComment *Line;
369 if (Tok.is(tok::verbatim_block_line)) {
370 Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
371 Tok.getVerbatimBlockText());
372 consumeToken();
373 if (Tok.is(tok::newline)) {
374 consumeToken();
375 }
376 } else {
377 // Empty line, just a tok::newline.
Dmitri Gribenko94572c32012-07-18 21:27:38 +0000378 Line = S.actOnVerbatimBlockLine(Tok.getLocation(), "");
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000379 consumeToken();
380 }
381 Lines.push_back(Line);
382 }
383
Dmitri Gribenko9f08f492012-07-20 20:18:53 +0000384 if (Tok.is(tok::verbatim_block_end)) {
385 VB = S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),
386 Tok.getVerbatimBlockName(),
387 copyArray(llvm::makeArrayRef(Lines)));
388 consumeToken();
389 } else {
390 // Unterminated \\verbatim block
391 VB = S.actOnVerbatimBlockFinish(VB, SourceLocation(), "",
392 copyArray(llvm::makeArrayRef(Lines)));
393 }
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000394
395 return VB;
396}
397
398VerbatimLineComment *Parser::parseVerbatimLine() {
399 assert(Tok.is(tok::verbatim_line_name));
400
401 Token NameTok = Tok;
402 consumeToken();
403
404 SourceLocation TextBegin;
405 StringRef Text;
406 // Next token might not be a tok::verbatim_line_text if verbatim line
407 // starting command comes just before a newline or comment end.
408 if (Tok.is(tok::verbatim_line_text)) {
409 TextBegin = Tok.getLocation();
410 Text = Tok.getVerbatimLineText();
411 } else {
412 TextBegin = NameTok.getEndLocation();
413 Text = "";
414 }
415
416 VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
417 NameTok.getVerbatimLineName(),
418 TextBegin,
419 Text);
420 consumeToken();
421 return VL;
422}
423
424BlockContentComment *Parser::parseBlockContent() {
425 switch (Tok.getKind()) {
426 case tok::text:
427 case tok::command:
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000428 case tok::html_start_tag:
429 case tok::html_end_tag:
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000430 return parseParagraphOrBlockCommand();
431
432 case tok::verbatim_block_begin:
433 return parseVerbatimBlock();
434
435 case tok::verbatim_line_name:
436 return parseVerbatimLine();
437
438 case tok::eof:
439 case tok::newline:
440 case tok::verbatim_block_line:
441 case tok::verbatim_block_end:
442 case tok::verbatim_line_text:
443 case tok::html_ident:
444 case tok::html_equals:
445 case tok::html_quoted_string:
446 case tok::html_greater:
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000447 case tok::html_slash_greater:
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000448 llvm_unreachable("should not see this token");
449 }
Matt Beaumont-Gay4d48b5c2012-07-06 21:13:09 +0000450 llvm_unreachable("bogus token kind");
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000451}
452
453FullComment *Parser::parseFullComment() {
454 // Skip newlines at the beginning of the comment.
455 while (Tok.is(tok::newline))
456 consumeToken();
457
458 SmallVector<BlockContentComment *, 8> Blocks;
459 while (Tok.isNot(tok::eof)) {
460 Blocks.push_back(parseBlockContent());
461
462 // Skip extra newlines after paragraph end.
463 while (Tok.is(tok::newline))
464 consumeToken();
465 }
466 return S.actOnFullComment(copyArray(llvm::makeArrayRef(Blocks)));
467}
468
469} // end namespace comments
470} // end namespace clang