blob: 75eae46b8b7a5b5d51dc944debe3a97cd9d27761 [file] [log] [blame]
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +00001//===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "clang/AST/CommentParser.h"
11#include "clang/AST/CommentSema.h"
12#include "llvm/Support/ErrorHandling.h"
13
14namespace clang {
15namespace comments {
16
17Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator):
18 L(L), S(S), Allocator(Allocator) {
19 consumeToken();
20}
21
22ParamCommandComment *Parser::parseParamCommandArgs(
23 ParamCommandComment *PC,
24 TextTokenRetokenizer &Retokenizer) {
25 Token Arg;
26 // Check if argument looks like direction specification: [dir]
27 // e.g., [in], [out], [in,out]
28 if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
29 PC = S.actOnParamCommandArg(PC,
30 Arg.getLocation(),
31 Arg.getEndLocation(),
32 Arg.getText(),
33 /* IsDirection = */ true);
34
35 if (Retokenizer.lexWord(Arg))
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +000036 PC = S.actOnParamCommandArg(PC,
37 Arg.getLocation(),
38 Arg.getEndLocation(),
39 Arg.getText(),
40 /* IsDirection = */ false);
41
42 return PC;
43}
44
45BlockCommandComment *Parser::parseBlockCommandArgs(
46 BlockCommandComment *BC,
47 TextTokenRetokenizer &Retokenizer,
48 unsigned NumArgs) {
49 typedef BlockCommandComment::Argument Argument;
50 Argument *Args = new (Allocator) Argument[NumArgs];
51 unsigned ParsedArgs = 0;
52 Token Arg;
53 while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
54 Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),
55 Arg.getEndLocation()),
56 Arg.getText());
57 ParsedArgs++;
58 }
59
60 return S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));
61}
62
63BlockCommandComment *Parser::parseBlockCommand() {
64 assert(Tok.is(tok::command));
65
66 ParamCommandComment *PC;
67 BlockCommandComment *BC;
68 bool IsParam = false;
69 unsigned NumArgs = 0;
70 if (S.isParamCommand(Tok.getCommandName())) {
71 IsParam = true;
72 PC = S.actOnParamCommandStart(Tok.getLocation(),
73 Tok.getEndLocation(),
74 Tok.getCommandName());
75 } else {
76 NumArgs = S.getBlockCommandNumArgs(Tok.getCommandName());
77 BC = S.actOnBlockCommandStart(Tok.getLocation(),
78 Tok.getEndLocation(),
79 Tok.getCommandName());
80 }
81 consumeToken();
82
83 if (Tok.is(tok::command) && S.isBlockCommand(Tok.getCommandName())) {
84 // Block command ahead. We can't nest block commands, so pretend that this
85 // command has an empty argument.
86 // TODO: Diag() Warn empty arg to block command
87 ParagraphComment *PC = S.actOnParagraphComment(
88 ArrayRef<InlineContentComment *>());
89 return S.actOnBlockCommandFinish(BC, PC);
90 }
91
92 if (IsParam || NumArgs > 0) {
93 // In order to parse command arguments we need to retokenize a few
94 // following text tokens.
95 TextTokenRetokenizer Retokenizer(Allocator);
96 while (Tok.is(tok::text)) {
97 if (Retokenizer.addToken(Tok))
98 consumeToken();
99 }
100
101 if (IsParam)
102 PC = parseParamCommandArgs(PC, Retokenizer);
103 else
104 BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs);
105
106 // Put back tokens we didn't use.
107 Token Text;
108 while (Retokenizer.lexText(Text))
109 putBack(Text);
110 }
111
112 BlockContentComment *Block = parseParagraphOrBlockCommand();
113 // Since we have checked for a block command, we should have parsed a
114 // paragraph.
115 if (IsParam)
116 return S.actOnParamCommandFinish(PC, cast<ParagraphComment>(Block));
117 else
118 return S.actOnBlockCommandFinish(BC, cast<ParagraphComment>(Block));
119}
120
121InlineCommandComment *Parser::parseInlineCommand() {
122 assert(Tok.is(tok::command));
123
124 const Token CommandTok = Tok;
125 consumeToken();
126
127 TextTokenRetokenizer Retokenizer(Allocator);
128 while (Tok.is(tok::text)) {
129 if (Retokenizer.addToken(Tok))
130 consumeToken();
131 }
132
133 Token ArgTok;
134 bool ArgTokValid = Retokenizer.lexWord(ArgTok);
135
136 InlineCommandComment *IC;
137 if (ArgTokValid) {
138 IC = S.actOnInlineCommand(CommandTok.getLocation(),
139 CommandTok.getEndLocation(),
140 CommandTok.getCommandName(),
141 ArgTok.getLocation(),
142 ArgTok.getEndLocation(),
143 ArgTok.getText());
144 } else {
145 IC = S.actOnInlineCommand(CommandTok.getLocation(),
146 CommandTok.getEndLocation(),
147 CommandTok.getCommandName());
148 }
149
150 Token Text;
151 while (Retokenizer.lexText(Text))
152 putBack(Text);
153
154 return IC;
155}
156
157HTMLOpenTagComment *Parser::parseHTMLOpenTag() {
158 assert(Tok.is(tok::html_tag_open));
159 HTMLOpenTagComment *HOT =
160 S.actOnHTMLOpenTagStart(Tok.getLocation(),
161 Tok.getHTMLTagOpenName());
162 consumeToken();
163
164 SmallVector<HTMLOpenTagComment::Attribute, 2> Attrs;
165 while (true) {
166 if (Tok.is(tok::html_ident)) {
167 Token Ident = Tok;
168 consumeToken();
169 if (Tok.isNot(tok::html_equals)) {
170 Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(),
171 Ident.getHTMLIdent()));
172 continue;
173 }
174 Token Equals = Tok;
175 consumeToken();
176 if (Tok.isNot(tok::html_quoted_string)) {
177 // TODO: Diag() expected quoted string
178 Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(),
179 Ident.getHTMLIdent()));
180 continue;
181 }
182 Attrs.push_back(HTMLOpenTagComment::Attribute(
183 Ident.getLocation(),
184 Ident.getHTMLIdent(),
185 Equals.getLocation(),
186 SourceRange(Tok.getLocation(),
187 Tok.getEndLocation()),
188 Tok.getHTMLQuotedString()));
189 consumeToken();
190 continue;
191 } else if (Tok.is(tok::html_greater)) {
192 HOT = S.actOnHTMLOpenTagFinish(HOT,
193 copyArray(llvm::makeArrayRef(Attrs)),
194 Tok.getLocation());
195 consumeToken();
196 return HOT;
197 } else if (Tok.is(tok::html_equals) ||
198 Tok.is(tok::html_quoted_string)) {
199 // TODO: Diag() Err expected ident
200 while (Tok.is(tok::html_equals) ||
201 Tok.is(tok::html_quoted_string))
202 consumeToken();
203 } else {
204 // Not a token from HTML open tag. Thus HTML tag prematurely ended.
205 // TODO: Diag() Err HTML tag prematurely ended
206 return S.actOnHTMLOpenTagFinish(HOT,
207 copyArray(llvm::makeArrayRef(Attrs)),
208 SourceLocation());
209 }
210 }
211}
212
213HTMLCloseTagComment *Parser::parseHTMLCloseTag() {
214 assert(Tok.is(tok::html_tag_close));
215 Token TokTagOpen = Tok;
216 consumeToken();
217 SourceLocation Loc;
218 if (Tok.is(tok::html_greater)) {
219 Loc = Tok.getLocation();
220 consumeToken();
221 }
222
223 return S.actOnHTMLCloseTag(TokTagOpen.getLocation(),
224 Loc,
225 TokTagOpen.getHTMLTagCloseName());
226}
227
228BlockContentComment *Parser::parseParagraphOrBlockCommand() {
229 SmallVector<InlineContentComment *, 8> Content;
230
231 while (true) {
232 switch (Tok.getKind()) {
233 case tok::verbatim_block_begin:
234 case tok::verbatim_line_name:
235 case tok::eof:
236 assert(Content.size() != 0);
237 break; // Block content or EOF ahead, finish this parapgaph.
238
239 case tok::command:
240 if (S.isBlockCommand(Tok.getCommandName())) {
241 if (Content.size() == 0)
242 return parseBlockCommand();
243 break; // Block command ahead, finish this parapgaph.
244 }
245 if (S.isInlineCommand(Tok.getCommandName())) {
246 Content.push_back(parseInlineCommand());
247 continue;
248 }
249
250 // Not a block command, not an inline command ==> an unknown command.
251 Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
252 Tok.getEndLocation(),
253 Tok.getCommandName()));
254 consumeToken();
255 continue;
256
257 case tok::newline: {
258 consumeToken();
259 if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
260 consumeToken();
261 break; // Two newlines -- end of paragraph.
262 }
263 if (Content.size() > 0)
264 Content.back()->addTrailingNewline();
265 continue;
266 }
267
268 // Don't deal with HTML tag soup now.
269 case tok::html_tag_open:
270 Content.push_back(parseHTMLOpenTag());
271 continue;
272
273 case tok::html_tag_close:
274 Content.push_back(parseHTMLCloseTag());
275 continue;
276
277 case tok::text:
278 Content.push_back(S.actOnText(Tok.getLocation(),
279 Tok.getEndLocation(),
280 Tok.getText()));
281 consumeToken();
282 continue;
283
284 case tok::verbatim_block_line:
285 case tok::verbatim_block_end:
286 case tok::verbatim_line_text:
287 case tok::html_ident:
288 case tok::html_equals:
289 case tok::html_quoted_string:
290 case tok::html_greater:
291 llvm_unreachable("should not see this token");
292 }
293 break;
294 }
295
296 return S.actOnParagraphComment(copyArray(llvm::makeArrayRef(Content)));
297}
298
299VerbatimBlockComment *Parser::parseVerbatimBlock() {
300 assert(Tok.is(tok::verbatim_block_begin));
301
302 VerbatimBlockComment *VB =
303 S.actOnVerbatimBlockStart(Tok.getLocation(),
304 Tok.getVerbatimBlockName());
305 consumeToken();
306
307 // Don't create an empty line if verbatim opening command is followed
308 // by a newline.
309 if (Tok.is(tok::newline))
310 consumeToken();
311
312 SmallVector<VerbatimBlockLineComment *, 8> Lines;
313 while (Tok.is(tok::verbatim_block_line) ||
314 Tok.is(tok::newline)) {
315 VerbatimBlockLineComment *Line;
316 if (Tok.is(tok::verbatim_block_line)) {
317 Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
318 Tok.getVerbatimBlockText());
319 consumeToken();
320 if (Tok.is(tok::newline)) {
321 consumeToken();
322 }
323 } else {
324 // Empty line, just a tok::newline.
325 Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
326 "");
327 consumeToken();
328 }
329 Lines.push_back(Line);
330 }
331
332 assert(Tok.is(tok::verbatim_block_end));
333 VB = S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),
334 Tok.getVerbatimBlockName(),
335 copyArray(llvm::makeArrayRef(Lines)));
336 consumeToken();
337
338 return VB;
339}
340
341VerbatimLineComment *Parser::parseVerbatimLine() {
342 assert(Tok.is(tok::verbatim_line_name));
343
344 Token NameTok = Tok;
345 consumeToken();
346
347 SourceLocation TextBegin;
348 StringRef Text;
349 // Next token might not be a tok::verbatim_line_text if verbatim line
350 // starting command comes just before a newline or comment end.
351 if (Tok.is(tok::verbatim_line_text)) {
352 TextBegin = Tok.getLocation();
353 Text = Tok.getVerbatimLineText();
354 } else {
355 TextBegin = NameTok.getEndLocation();
356 Text = "";
357 }
358
359 VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
360 NameTok.getVerbatimLineName(),
361 TextBegin,
362 Text);
363 consumeToken();
364 return VL;
365}
366
367BlockContentComment *Parser::parseBlockContent() {
368 switch (Tok.getKind()) {
369 case tok::text:
370 case tok::command:
371 case tok::html_tag_open:
372 case tok::html_tag_close:
373 return parseParagraphOrBlockCommand();
374
375 case tok::verbatim_block_begin:
376 return parseVerbatimBlock();
377
378 case tok::verbatim_line_name:
379 return parseVerbatimLine();
380
381 case tok::eof:
382 case tok::newline:
383 case tok::verbatim_block_line:
384 case tok::verbatim_block_end:
385 case tok::verbatim_line_text:
386 case tok::html_ident:
387 case tok::html_equals:
388 case tok::html_quoted_string:
389 case tok::html_greater:
390 llvm_unreachable("should not see this token");
391 }
392}
393
394FullComment *Parser::parseFullComment() {
395 // Skip newlines at the beginning of the comment.
396 while (Tok.is(tok::newline))
397 consumeToken();
398
399 SmallVector<BlockContentComment *, 8> Blocks;
400 while (Tok.isNot(tok::eof)) {
401 Blocks.push_back(parseBlockContent());
402
403 // Skip extra newlines after paragraph end.
404 while (Tok.is(tok::newline))
405 consumeToken();
406 }
407 return S.actOnFullComment(copyArray(llvm::makeArrayRef(Blocks)));
408}
409
410} // end namespace comments
411} // end namespace clang
412
413